在使用pacemaker维护的集群时,经常使用pcs 命令查询集群的状态,如:
pcs status 查询集群下各主机的状态
Cluster name: 111
Last updated: Tue Mar 8 10:37:39 2016
Last change: Mon Mar 7 14:22:04 2016 via crmd on cvm1457070021
Stack: corosync
Current DC: xxxxxx(2) - partition with quorum
Version: 1.1.10-32.el7.centos.1-368c726
2 Nodes configured
6 Resources configured
Online: [ xxxxxxxxx xxxxxxxxx]
Full list of resources:
Clone Set: dlm-clone [dlm]
Started: [ xxxxxxxxx xxxxxxxxx]
fence192.168.0.221 (stonith:fence_ipmilan): Started xxxxxxxxxxxxx
fence192.168.0.236 (stonith:fence_ipmilan): Started xxxxxxxxxxxxx
Clone Set: gfs2_cluster-clone [gfs2_cluster]
Started: [ xxxxxxxxx xxxxxxxxx]
PCSD Status:
192.168.x.xxx: Online
192.168.x.xxx: Online
Daemon Status:
corosync: active/disabled
pacemaker: active/disabled
pcsd: active/enabled
资源正常时,查询结果会马上返回。但是,当网络资源紧缺或者出现延迟的时候。此命令的返回有可能超过10 seconds,但是pacemaker的校验时间低于3 seconds。如果在集群中
使用此命令维护,长时间不返回。会导致集群自动fence。
linux环境下,我们一般使用盘popen或者systemcall进行命令调用,并通过其返回值和打印值判断调用成功与否。当上述情况发生时,pclose会卡住,导致不能在规定时间内得到正确
的结果。并且由于pclose源码的问题,在多线程操作时,由于一个线程卡住不能释放资源,造成所有线程等待的问题。
针对以上问题,经研究源码,修改popen pclose函数为mypopen mypclose,代码如下
头文件略
static struct pid {
struct pid *next;
FILE *fp;
pid_t pid;
} *pidlist;
extern char **environ; //版本控制
FILE *mypopen(const char *program, const char *type)
{
struct pid * volatile cur;
FILE *iop;
int pdes[2];
pid_t pid;
char *argp[] = {"sh", "-c", NULL, NULL}; //调用shell
if ((*type != 'r' && *type != 'w') || type[1] != '\0') {
errno = EINVAL;
return (NULL);
}
if ((cur = malloc(sizeof(struct pid))) == NULL)
return (NULL);
if (pipe(pdes) < 0) {
free(cur);
return (NULL);
}
switch (pid = fork()) { //创建新进程
case -1: /* Error. */
(void)close(pdes[0]);
(void)close(pdes[1]);
free(cur);
return (NULL);
/* NOTREACHED */
case 0: /* Child. */
{
struct pid *pcur;
/*
* We fork()'d, we got our own copy of the list, no
* contention.
*/
for (pcur = pidlist; pcur; pcur = pcur->next)
close(fileno(pcur->fp));
if (*type == 'r') {
(void) close(pdes[0]);
if (pdes[1] != STDOUT_FILENO) {
(void)dup2(pdes[1], STDOUT_FILENO);
(void)close(pdes[1]);
}
} else {
(void)close(pdes[1]);
if (pdes[0] != STDIN_FILENO) {
(void)dup2(pdes[0], STDIN_FILENO);
(void)close(pdes[0]);
}
}
argp[2] = (char *)program;
execve(_PATH_BSHELL, argp, environ); //执行程序
_exit(127);
/* NOTREACHED */
}
}
/* Parent; assume fdopen can't fail. */
if (*type == 'r') {
iop = fdopen(pdes[0], type);
(void)close(pdes[1]);
} else {
iop = fdopen(pdes[1], type);
(void)close(pdes[0]);
}
/* Link into list of file descriptors. */
cur->fp = iop;
cur->pid = pid;
cur->next = pidlist;
pidlist = cur;
return (iop);
}
/*
* pclose --
* Pclose returns -1 if stream is not associated with a `popened' command,
* if already `pclosed', or waitpid returns an error.
*/
int mypclose(FILE *iop,int flag = 0 /*加入控制标志,若为0,则调用waitpid直到返回;若为1,则立即返回*/)
{
struct pid *cur, *last;
int pstat;
pid_t pid;
/* Find the appropriate file pointer. */
for (last = NULL, cur = pidlist; cur; last = cur, cur = cur->next)
if (cur->fp == iop)
break;
if (cur == NULL)
return (-1);
(void)fclose(iop);
if(0 == flag)
{ //源代码方式
do {
pid = waitpid(cur->pid, &pstat, 0);
} while (pid == -1 && errno == EINTR);
}
else
{ //增加方式
if(pid != 0)
kill(pid,SIGKILL); //强制结束调用
}
/* Remove the entry from the linked list. */
if (last == NULL)
pidlist = cur->next;
else
last->next = cur->next;
free(cur);
return (pid == -1 ? -1 : pstat);
}
为了解决延迟问题,再封装一个函数,控制调用延时,代码如下:
int getResultFromPopenLinShi(const char* pCmd /*输入命令*/, char* pResult /*输出结果*/, int size /*输出结果大小*/, int time /*延迟时间*/)
{
FILE *fp = NULL;
int ret = 0;
char tmpBuf[MAX_BUF_SIZE] = {0};
if(pCmd == NULL || pResult == NULL || size == 0)
{
LogInfo("parameters error!\n");
return -1;
}
char *bufAll = new char[size+1];
memset(bufAll, 0 ,size+1);
if(bufAll == NULL)
{
LogInfo("mem is not enough error!\n");
return -1;
}
fp = mypopen(pCmd, "r");
if(fp == NULL)
{
LogInfo("popen failed");
delete [] bufAll ;
return -1;
}
int iCnt = time;
int iSleepCnt = 0;
int readAllCount = 0;
bool bExitFlag = false;
fcntl(fileno(fp), F_SETFL,O_NONBLOCK);
while( (0 < iCnt))
{
ret = fread(tmpBuf,1,sizeof(tmpBuf)-1,fp);
if(0 == ret)
{
if(0 == mypclose(fp,2))
{
if(true != bExitFlag)
{
bExitFlag = true;
continue;
}
else
{
break;
}
}
sleep(iSleepCnt++);
iCnt--;
continue;
阅读(7630) | 评论(0) | 转发(0) |