Chinaunix首页 | 论坛 | 博客
  • 博客访问: 349052
  • 博文数量: 60
  • 博客积分: 1570
  • 博客等级: 上尉
  • 技术积分: 620
  • 用 户 组: 普通用户
  • 注册时间: 2009-11-02 23:37
文章分类

全部博文(60)

文章存档

2012年(2)

2010年(2)

2009年(56)

分类: LINUX

2010-04-18 16:19:55

参考文章:
1.
2. frankzfz.cublog.cn: http://blog.chinaunix.net/u3/104447/showart_2150356.html


一、问题起因:
a》
在文章:  写到:
There are also three other socket options you can set for keepalive when you write your application.
They all use the SOL_TCP  level instead of SOL_SOCKET, and they override system-wide variables
only for the current socket. If you read without writing first,
the current system-wide parameters will be returned.

TCP_KEEPCNT:     overrides tcp_keepalive_probes
TCP_KEEPIDLE:     overrides tcp_keepalive_time
TCP_KEEPINTVL:     overrides tcp_keepalive_intvl

上面说:设置 socket 的keepalive 属性选项, 只对当前socket起作用
-----they override system-wide variables only for the current socket。

b》
可是:《TCP/IP详解 卷1:协议》page 252 中写到:
“一个被人们不断讨论的关于保活选项的问题就是两个小时的空闲时间是否可以改
变。通常他们希望该数值可以小得多,处在分钟的数量级。正如我们在附录E看到的,
这个值通常可以改变,但是在该附录所描述的所有系统中,保活间隔时间是系统级的
变量,因此改变它会影响到所有使用该功能的用户。
Host Requirements RFC提到一个实现可提供保活的功能,但是除非应用程序指明
要这样,否则就不能使用该功能。而且,保活间隔必须是可配置的,但是其默认值必
须不小于两个小时。”

我的理解是:这里两处的描述,有些矛盾。
到底 keepalive 属性是只对单个socket起作用,还是会影响其他socket?
为此,用来下面的程序来测试。

在下面的程序中,在服务器端使用TCP保活选项,客户端没有使用。
客户端也可以使用这个选项,但通常都是服务器设置这个功能。
如果双方都特别需要了解对方是否已经消失,则双方都可以使用这个选项。

这个程序写的比较简单:
没有考虑服务器效率,只是为了测试 keepalive 属性。
错误处理也都非常简单。

说明:本文只是测试客户主机崩溃或者网络异常断开,比如拔掉网线 或 中间路由器崩溃等情况。
正常关机或重启本文不予考虑。

思路:
在程序中表现为,当tcp检测到对端socket不再可用时(TCP发出保活探查报文,但是没有收到客户端对其保活探查报文的响应), select会返回socket可读, 并且在 recv 或 read 时返回-1,
同时设置errno为 ETIMEDOUT。

服务器端

#include <stdio.h>
#include <stdlib.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>

#define SRV_PORT 12345
#define LISTEN_QUEUE 20

#define CONN_MAX 1024

/* 存放已连接的客户端描述符 */

int conn[CONN_MAX];

/* 当前已连接客户端的总数 */
int conn_amount;

int init_socket_tcp(unsigned short port, int backlog);
void active_keepalive(int sockfd);
void set_keepalive_params(int sockfd, int idle, int count, int intvl);

static int firstclient = 1;

int main(int argc, char *argv[])
{
    int sockfd, cltfd = -1;
    struct sockaddr_in clt_addr;
    socklen_t addrlen;
    
    int i, n;
    char buf[1024];
    int len;

    int maxfd;
    fd_set readfds;

    sockfd = init_socket_tcp(SRV_PORT, LISTEN_QUEUE);
    if (sockfd < 0) {
        perror("init socket failed!");
        exit(EXIT_FAILURE);
    }

    for (i = 0; i < CONN_MAX; i++) {
            conn[i] = -1;
    }

    maxfd = sockfd;
    conn_amount = 0;

    while (1) {
        FD_ZERO(&readfds);
        FD_SET(sockfd, &readfds);
    
        for (i = 0; i < CONN_MAX; i++) {
            if (conn[i] != -1) {
                FD_SET(conn[i], &readfds);
                if (conn[i] > maxfd)
                    maxfd = conn[i];
            }
        }

        printf("start select............\n");
        if ((n = select(maxfd + 1, &readfds, NULL, NULL, NULL)) < 0) {
            perror("select faield");
            continue;
        }
        
        for (i = 0; i < CONN_MAX; i++) {
            if ((conn[i] != -1) && FD_ISSET(conn[i], &readfds)) {
                len = read(conn[i], buf, sizeof(buf) - 1);
                if (len < 0) {
                    if (errno == EINTR) {
                        printf("errno == EINTR\n");
                        continue;
                    }
                    if (errno == ETIMEDOUT)
                        printf("ETIMEDOUT..............., client: %d\n", i + 1);
                    perror("read error");
                    close(conn[i]);
                    conn[i] = -1;
                    conn_amount--;
                } else if (len == 0) {
                        printf("client close socket.\n");
                        close(conn[i]);
                        conn[i] = -1;
                        conn_amount--;
                } else
                    printf("read from client: %s\n", buf);
            }
        } /* for (i = 0; i < CONN_MAX; i++)*/

        if (FD_ISSET(sockfd, &readfds)) {
            addrlen = sizeof(struct sockaddr_in);
            cltfd = accept(sockfd, (struct sockaddr *)&clt_addr, &addrlen);
            if (cltfd < 0) {
                perror("accept failed");
                continue;
            }
        
            if (conn_amount < CONN_MAX) {
                conn_amount++;
                printf("*********** conn_amount = %d\n", conn_amount);
                
                /* set tcp_keepalive_* options */
                /* 设置第一个客户端socket keepalive 属性 */

                if (firstclient) {

                    /* 开启 keepalive 选项 */
                    active_keepalive(cltfd);
                    /* 设置 keepalive 相关参数 */

                    set_keepalive_params(cltfd, 60, 3, 2);
                    firstclient = 0;
                } else {
/* 设置其他客户端socket keepalive 属性 */

                    /* 开启 keepalive 选项 */

                    active_keepalive(cltfd);

                    /* 设置 keepalive 相关参数 */
                    set_keepalive_params(cltfd, 20, 3, 5);
                }

                for (i = 0; i < CONN_MAX; i++) {
                    if (conn[i] == -1) {
                        conn[i] = cltfd;
                        break;
                    }
                }
            } else { /* 到达 CONN_MAX后,不处理其他客户端连接请求 */
                printf("max connection arrived, I will close the client socket!\n");
                close(cltfd);
            }
        }

    } /* while (1) */

    close(sockfd);
    exit(EXIT_SUCCESS);
}

int init_socket_tcp(unsigned short port, int backlog)
{
    int sockfd;
    struct sockaddr_in srv_addr;
    socklen_t addrlen;

    sockfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
    if (sockfd < 0) {
        perror("create socket failed!");
        return -1;
    }
    
    addrlen = sizeof(struct sockaddr_in);
    memset(&srv_addr, 0, sizeof(struct sockaddr_in));
    srv_addr.sin_family = AF_INET;
    srv_addr.sin_addr.s_addr = htonl(INADDR_ANY);
    srv_addr.sin_port = htons(port);

    int reuse = 1;
    if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(int)) < 0) {
        perror("setsockopt SO_REUSEADDR");
        close(sockfd);
        return -1;
    }

    if (bind(sockfd, (struct sockaddr *)&srv_addr, addrlen) < 0) {
        perror("bind errror");
        close(sockfd);
        return -1;
    }
    
    if (listen(sockfd, backlog) < 0) {
        perror("listen error");
        close(sockfd);
        return -1;
    }

    return sockfd;
}

void active_keepalive(int sockfd)
{
    int optval;
    socklen_t optlen = sizeof(optval);

    /* check the status for the keepalive option */
    if (getsockopt(sockfd, SOL_SOCKET, SO_KEEPALIVE, &optval, &optlen) < 0) {
        perror("getsockopt SO_KEEPALIVE failed");
        close(sockfd);
        exit(EXIT_FAILURE);
    }
    printf("SO_KEEPALIVE is %s\n", optval ? "ON" : "OFF");

    /* set the option active */
    optval = 1;
    optlen = sizeof(optval);
    if (setsockopt(sockfd, SOL_SOCKET, SO_KEEPALIVE, &optval, optlen) < 0) {
        printf("setsockopt SO_KEEPALIVE failed,reason: %m\n");
        close(sockfd);
        exit(EXIT_FAILURE);
    }
    printf("SO_KEEPALIVE on socket\n");

    /* check the status again */
    if (getsockopt(sockfd, SOL_SOCKET, SO_KEEPALIVE, &optval, &optlen) < 0) {
        perror("getsockopt SO_KEEPALIVE again failed");
        close(sockfd);
        exit(EXIT_FAILURE);
    }
    printf("SO_KEEPALIVE is %s\n", (optval ? "ON" : "OFF"));
}

void set_keepalive_params(int sockfd, int idle, int count, int intvl)
{
    int keepalive_time = idle;
    int keepalive_probes = count;
    int keepalive_intvl = intvl;
    
    if (setsockopt(sockfd, SOL_TCP, TCP_KEEPIDLE, &keepalive_time, sizeof(int)) < 0) {
        perror("TCP_KEEPIDLE failed");
        return;
    }
    
    if (setsockopt(sockfd, SOL_TCP, TCP_KEEPCNT, &keepalive_probes, sizeof(int)) < 0) {
        perror("TCP_KEEPCNT failed");
        return;
    }

    if (setsockopt(sockfd, SOL_TCP, TCP_KEEPINTVL, &keepalive_intvl, sizeof(int)) < 0) {
        perror("TCP_KEEPINTVL failed");
        return;
    }

    return;
}




客户端:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <netinet/in.h>

#define SRV_IP "192.168.1.4"
#define SRV_PORT 12345

#define errexit(msg) do {perror(msg); exit(EXIT_FAILURE);} while(0)

int main(int argc, char *argv[])
{
    int sockfd;
    struct sockaddr_in srvaddr;

    sockfd = socket(AF_INET, SOCK_STREAM, 0);
    if (sockfd < 0)
        errexit("socket");
    
    memset(&srvaddr, 0, sizeof(struct sockaddr_in));    
    srvaddr.sin_family = AF_INET;
    srvaddr.sin_addr.s_addr = inet_addr(SRV_IP);
    srvaddr.sin_port = htons(SRV_PORT);

    if (connect(sockfd, (struct sockaddr *)&srvaddr, sizeof(struct sockaddr_in)) != 0)
        errexit("connect");
    else
        printf("connect to server succeeded!\n");

    /* 连接成功后,什么也不做 */
    pause();

    close(sockfd);
    exit(0);
}



测试过程简单描述:
1.启动服务器端程序
2.启动 2 个客户端
3.在2个客户端都成功连接服务器后,断开服务器主机的网线
4.似乎在 20+/-10 秒后,成功检测出客户端 2 断开了,然后似乎再经过 30+/- 10 秒左右, 检测到客户端 1 断开连接。(自己口头数的秒,具体时间很不准确,但不影响结论  )。

最后的结论:
they override system-wide variables only for the current socket 应该是对的。
也就是,keepalive 属性只对单个socket 起作用,
对其他 socket 的 keepalive 属性没有影响.


阅读(5724) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~