linux学习记录
分类:
2010-09-27 17:13:15
集群心跳可在文件etc/ha.d/ha.cf中进行配置,如下所示:
node server-1 server-2
bcast eth0 ucast eth1 192.168.127.132 |
心跳有如下四种类型:串口、广播、多播、ucast,以下分别进行介绍:
#serial 串口名称
#serial /dev/ttyS0 # Linux
#serial /dev/cuaa0 # FreeBSD
#serial /dev/cuad0 # FreeBSD 6.x
#serial /dev/cua/a # Solaris
#广播heartbeats的接口
#bcast eth0 # Linux
#bcast eth1 eth2 # Linux
#bcast le0 # Solaris
#bcast le1 le2 # Solaris
# 设置一个多播心跳介质
# mcast [dev] [mcast group] [port] [ttl] [loop]
#[dev]发送/接收heartbeats的设备
#[mcast group]加入到的多播组(D类多播地址224.0.0.0 - 239.255.255.255)
#[port]端口用于发送/接收udp(设置这个值跟上面的udpport为相同值)
#[ttl]外流的heartbeats的ttl值。这个影响多播包能传播多远。(0-255)必须要大于0 。
#[loop]为多播heartbeat开关loopback。如果enabled,一个外流的包将被回环到原处并由发送它的接口接收。(0或者1)设置这个值为0。
#mcast eth0 225.0.0.1 694 1 0
#配置一个unicast / udp heartbeat 介质
#ucast [dev] [peer-ip-addr]
#[dev]用于发送/接收heartbeat的设备
#[peer-ip-addr]包被发送到的对等的IP地址
#ucast eth0 192.168.1.2
在heartbeat中,可以使用命令cl_status来查询集群心跳的相关信息。
显示节点server-1所使用的心跳:
[root@server-1 bin]# ./cl_status listhblinks server-1
eth1 eth0 |
显示节点server-1的eth0心跳状态:
[root@server-1 bin]# ./cl_status hblinkstatus server-1 eth0
up |
为了在mgmt下可以检查心跳状态,在mgmt/daemon/mgmt_hb.c中添加如下函数:
char*
on_status_of_hblinks(char* argv[], int argc) { const char * intf; const char * if_status; const char* name = NULL; char* ret = cl_strdup(MSG_OK); if (hb->llc_ops->init_nodewalk(hb) != HA_OK) { mgmt_log(LOG_ERR, "Cannot start node walk"); mgmt_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); cl_free(ret); return cl_strdup(MSG_FAIL); } while((name = hb->llc_ops->nextnode(hb))!= NULL) { if (strcmp(hb->llc_ops->node_type(hb,name), "ping") == 0) continue; if (strcmp(get_localnodeinfo(), name) == 0) continue; if (hb->llc_ops->init_ifwalk(hb, name) != HA_OK) { cl_log(LOG_ERR, "Cannot start heartbeat link interface walk."); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); return cl_strdup(MSG_FAIL"\nCannot start heartbeat link interface walk"); } while ((intf = hb->llc_ops->nextif(hb))) { if_status = hb->llc_ops->if_status(hb, name, intf); if (if_status == NULL) { /* Should be error ? */ cl_log(LOG_ERR, "Cannot get heartbeat link status"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); return cl_strdup(MSG_FAIL"\nCannot get heartbeat link status"); } if ( STRNCMP_CONST(if_status, "dead") == 0 ) { return cl_strdup(MSG_OK"\nhas_dead"); } } if (hb->llc_ops->end_ifwalk(hb) != HA_OK) { cl_log(LOG_ERR, "Cannot end heartbeat link interface walk"); cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); return cl_strdup(MSG_FAIL"\nCannot end heartbeat link interface walk"); } } if (hb->llc_ops->end_nodewalk(hb) != HA_OK) { mgmt_log(LOG_ERR, "Cannot end node walk"); mgmt_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb)); cl_free(ret); return cl_strdup(MSG_FAIL); } return cl_strdup(MSG_OK"\ngood"); } |
之后即可在mgmt中使用status_hblinks命令即可查询心跳的状态,如:
[root@server-1 heartbeat-gui]# ./mgmtcmd.py status_hblinks
--------------------------- ok has_dead |