Chinaunix首页 | 论坛 | 博客
  • 博客访问: 764486
  • 博文数量: 144
  • 博客积分: 0
  • 博客等级: 民兵
  • 技术积分: 1150
  • 用 户 组: 普通用户
  • 注册时间: 2014-03-17 14:32
个人简介

小公司研发总监,既当司令也当兵!

文章分类

全部博文(144)

分类: LINUX

2015-05-20 10:33:02

1.    重要数据与注册

const struct proto_ops inet_stream_ops = {

    .family       = PF_INET,

    .owner     = THIS_MODULE,

    .release      = inet_release,

    .bind         = inet_bind,

    .connect      = inet_stream_connect,

    .socketpair      = sock_no_socketpair,

    .accept        = inet_accept,

    .getname      = inet_getname,

    .poll         = tcp_poll,

    .ioctl        = inet_ioctl,

    .listen          = inet_listen,

    .shutdown     = inet_shutdown,

    .setsockopt      = sock_common_setsockopt,

    .getsockopt      = sock_common_getsockopt,

    .sendmsg      = inet_sendmsg,

    .recvmsg      = inet_recvmsg,

    .mmap         = sock_no_mmap,

    .sendpage     = inet_sendpage,

    .splice_read     = tcp_splice_read,

#ifdef CONFIG_COMPAT

    .compat_setsockopt = compat_sock_common_setsockopt,

    .compat_getsockopt = compat_sock_common_getsockopt,

    .compat_ioctl    = inet_compat_ioctl,

#endif

};

 

struct proto tcp_prot = {

    .name         = "TCP",

    .owner        = THIS_MODULE,

    .close        = tcp_close,

    .connect      = tcp_v4_connect,

    .disconnect       = tcp_disconnect,

    .accept           = inet_csk_accept,

    .ioctl        = tcp_ioctl,

    .init         = tcp_v4_init_sock,

    .destroy      = tcp_v4_destroy_sock,

    .shutdown     = tcp_shutdown,

    .setsockopt       = tcp_setsockopt,

    .getsockopt       = tcp_getsockopt,

    .recvmsg      = tcp_recvmsg,

    .sendmsg      = tcp_sendmsg,

    .sendpage     = tcp_sendpage,

    .backlog_rcv      = tcp_v4_do_rcv,

    .hash         = inet_hash,

    .unhash           = inet_unhash,

    .get_port     = inet_csk_get_port,

    .enter_memory_pressure   = tcp_enter_memory_pressure,

    .sockets_allocated   = &tcp_sockets_allocated,

    .orphan_count     = &tcp_orphan_count,

    .memory_allocated = &tcp_memory_allocated,

    .memory_pressure  = &tcp_memory_pressure,

    .sysctl_mem       = sysctl_tcp_mem,

    .sysctl_wmem      = sysctl_tcp_wmem,

    .sysctl_rmem      = sysctl_tcp_rmem,

    .max_header       = MAX_TCP_HEADER,

    .obj_size     = sizeof(struct tcp_sock),

    .slab_flags       = SLAB_DESTROY_BY_RCU,

    .twsk_prot    = &tcp_timewait_sock_ops,

    .rsk_prot     = &tcp_request_sock_ops,

    .h.hashinfo       = &tcp_hashinfo,

    .no_autobind      = true,

#ifdef CONFIG_COMPAT

    .compat_setsockopt   = compat_tcp_setsockopt,

    .compat_getsockopt   = compat_tcp_getsockopt,

#endif

};

 

static struct inet_protosw inetsw_array[] =

{

    {

       .type =       SOCK_STREAM,

       .protocol =   IPPROTO_TCP,

       .prot =       &tcp_prot,

       .ops =        &inet_stream_ops,

       .no_check =   0,

       .flags =      INET_PROTOSW_PERMANENT |

                 INET_PROTOSW_ICSK,

    },

 

    {

       .type =       SOCK_DGRAM,

       .protocol =   IPPROTO_UDP,

       .prot =       &udp_prot,

       .ops =        &inet_dgram_ops,

       .no_check =   UDP_CSUM_DEFAULT,

       .flags =      INET_PROTOSW_PERMANENT,

       },

 

 

       {

           .type =       SOCK_RAW,

           .protocol =   IPPROTO_IP, /* wild card */

           .prot =       &raw_prot,

           .ops =        &inet_sockraw_ops,

           .no_check =   UDP_CSUM_DEFAULT,

           .flags =      INET_PROTOSW_REUSE,

       }

};

然后在inet_init()中将inetsw_array注册到inetsw中:

static int __init inet_init(void)

{

......

 

/* Register the socket-side information for inet_create. */

    for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)

       INIT_LIST_HEAD(r);

 

    for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)

       inet_register_protosw(q);

.......

}

 

 

2.    socket()方法

2.1  函数说明

#include

int socket(int family, int type, int protocol)   

返回值说明:返回非负描述字——成功,返回-1——失败

2.2  socket()系统调用

SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)

{

    int retval;

    struct socket *sock;

    int flags;

 

    /* Check the SOCK_* constants for consistency.  */

 

    flags = type & ~SOCK_TYPE_MASK;

    if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))

       return -EINVAL;

    type &= SOCK_TYPE_MASK;

 

    if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))

       flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;

 

    // 创建一个socket实例并分配sock结构

    retval = sock_create(family, type, protocol, &sock);

    if (retval < 0)

       goto out;

 

    // socket与文件关联起来,并且添加文件描述符关联,返回该文件描述符

    retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));

    if (retval < 0)

       goto out_release;

 

out:

    /* It may be already another descriptor 8) Not kernel problem. */

    return retval;

 

out_release:

    sock_release(sock);

    return retval;

}

sock_create__sock_create的包裹函数:

int __sock_create(struct net *net, int family, int type, int protocol,

            struct socket **res, int kern)

{

    int err;

    struct socket *sock;

    const struct net_proto_family *pf;

   

    …… ……..

 

 

    /*

     *  Allocate the socket and allow the family to set things up. if

     *  the protocol is 0, the family is instructed to select

*   an appropriate default.

     */

    sock = sock_alloc();

    if (!sock) {

       if (net_ratelimit())

           printk(KERN_WARNING "socket: no more sockets\n");

       return -ENFILE;   /* Not exactly a match, but its the

                 closest posix thing */

    }

 

    sock->type = type;

 

    rcu_read_lock();

 

    // 从协议簇中取出对应协议类型,其中有该协议簇注册的create函数

    pf = rcu_dereference(net_families[family]);

    err = -EAFNOSUPPORT;

    if (!pf)

       goto out_release;

 

    /*

     * We will call the ->create function, that possibly is in a loadable

     * module, so we have to bump that loadable module refcnt first.

     */ 
     // 再次强调一次,这里调用的是协议簇的create函数,其会根据protocol进一步调用具体协议的create函数,比如pppox下有pppoe

err = pf->create(net, sock, protocol, kern);

    if (err < 0)

       goto out_module_put;

 

    …

 

    *res = sock;

 

    return 0;

}

 

net_families中, AF_INET对应的结构数据如下:

static const struct net_proto_family inet_family_ops = {

    .family = PF_INET,

    .create = inet_create,

    .owner = THIS_MODULE,

};

 

static int inet_create(struct net *net, struct socket *sock, int protocol,

              int kern)

{

    struct sock *sk;

    struct inet_protosw *answer;

    struct inet_sock *inet;

    struct proto *answer_prot;

    unsigned char answer_flags;

    char answer_no_check;

    int try_loading_module = 0;

    int err;

 

    …

    list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {

 

       err = 0;

       /* Check the non-wild match. */

       if (protocol == answer->protocol) {

           if (protocol != IPPROTO_IP)

              break;

       } else {

           /* Check for the two wild cases. */

           if (IPPROTO_IP == protocol) {

              protocol = answer->protocol;

              break;

           }

           if (IPPROTO_IP == answer->protocol)

              break;

       }

       err = -EPROTONOSUPPORT;

    }

 

   

    …

 

    // answer已经指向inetsw[sock->type], inet_protosw结构内容

    // inet_protosw中的ops函数集注册到sock->ops

    sock->ops = answer->ops;

    answer_prot = answer->prot;

    answer_no_check = answer->no_check;

    answer_flags = answer->flags;

    rcu_read_unlock();

 

    // 分配一个sock结构, 并且传入了inet_protosw->port

    sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot);

    if (sk == NULL)

       goto out;

 

    … 略, sk进行一些检查和初始化

 

    // 初始化sk的发送、接收、出错队列,绑定sk状态变化、数据就绪等回调函数

sock_init_data(sock, sk);

 

    if (sk->sk_prot->init) {

       err = sk->sk_prot->init(sk);//sk进行进一步init

//(tcp_v4_init_sock)

       if (err)

           sk_common_release(sk);

    }

}

3.    bind()函数

#include

int bind(int sockfd, const struct sockaddr *serveraddr, socklen_t len)

返回0——成功, -1——出错

3.1  bind系统调用

SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)

{

    struct socket *sock;

    struct sockaddr_storage address;

    int err, fput_needed;

 

    // 通过sockfd查找socket

    sock = sockfd_lookup_light(fd, &err, &fput_needed);

    if (sock) {

       // 将用户空间的地址信息复制到内核空间

       err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address);

       if (err >= 0) {

           err = security_socket_bind(sock,

                        (struct sockaddr *)&address,

                        addrlen);

           if (!err)

              // 调用socket注册的bind函数(inet_bind)

              err = sock->ops->bind(sock,

                           (struct sockaddr *)

                           &address, addrlen);

       }

       fput_light(sock->file, fput_needed);

    }

    return err;

}

 

// 如果在sk注册的protocol中,有注册bind函数,那么就调用该注册函数进行bind,否

// 则对需要绑定的端口进行检查,将该ip和端口绑定到sk

int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)

{

    struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;

    struct sock *sk = sock->sk;

    struct inet_sock *inet = inet_sk(sk);

    unsigned short snum;

    int chk_addr_ret;

    int err;

 

    /* If the socket has its own bind function then use it. (RAW) */

    if (sk->sk_prot->bind) {

       // 如果sk中已经注册了proto,那么使用注册的bind函数

       err = sk->sk_prot->bind(sk, uaddr, addr_len);

       goto out;

    }

    err = -EINVAL;

    if (addr_len < sizeof(struct sockaddr_in))

       goto out;

 

    chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);

 

    /* Not specified by any standard per-se, however it breaks too

     * many applications when removed.  It is unfortunate since

     * allowing applications to make a non-local bind solves

     * several problems with systems using dynamic addressing.

     * (ie. your servers still start up even if your ISDN link

     *  is temporarily down)

     */

    err = -EADDRNOTAVAIL;

    if (!sysctl_ip_nonlocal_bind &&

        !(inet->freebind || inet->transparent) &&

        addr->sin_addr.s_addr != htonl(INADDR_ANY) &&

        chk_addr_ret != RTN_LOCAL &&

        chk_addr_ret != RTN_MULTICAST &&

        chk_addr_ret != RTN_BROADCAST)

       goto out;

 

    snum = ntohs(addr->sin_port);

    err = -EACCES;

    if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))

       goto out;

 

    /*      We keep a pair of addresses. rcv_saddr is the one

     *      used by hash lookups, and saddr is used for transmit.

     *

     *      In the BSD API these are the same except where it

     *      would be illegal to use them (multicast/broadcast) in

     *      which case the sending device address is used.

     */

    lock_sock(sk);

 

    /* Check these errors (active socket, double bind). */

    err = -EINVAL;

    if (sk->sk_state != TCP_CLOSE || inet->inet_num)

       goto out_release_sock;

 

    inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;

    if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)

       inet->inet_saddr = 0;  /* Use device */

 

    /* Make sure we are allowed to bind here. */

    if (sk->sk_prot->get_port(sk, snum)) {

       inet->inet_saddr = inet->inet_rcv_saddr = 0;

       err = -EADDRINUSE;

       goto out_release_sock;

    }

 

    if (inet->inet_rcv_saddr)

       sk->sk_userlocks |= SOCK_BINDADDR_LOCK;

    if (snum)

       sk->sk_userlocks |= SOCK_BINDPORT_LOCK;

    inet->inet_sport = htons(inet->inet_num);

    inet->inet_daddr = 0;

    inet->inet_dport = 0;

    sk_dst_reset(sk);

    err = 0;

out_release_sock:

    release_sock(sk);

out:

    return err;

}

4.    listen()函数

#include

int listen(int sockfd, int backlog)

返回0——成功, -1——失败

listen()函数仅由TCP服务器调用,它做两件事情:

(1)    socket函数创建一个套接口时,默认为一个主动套接口。Listen函数把一个未连接的套接口转换为一个被动套接口,并指示内核应该接受指向该套接口的连接请求。同时,更改tcp的状态有CLOSED变更为LISTEN状态。

(2)    指定内核应该为相应套接口排队的最大连接个数(由backlog指定)。

4.1  listen()系统调用

SYSCALL_DEFINE2(listen, int, fd, int, backlog)

{

    struct socket *sock;

    int err, fput_needed;

    int somaxconn;

 

    sock = sockfd_lookup_light(fd, &err, &fput_needed);

    if (sock) {

       somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;

       if ((unsigned)backlog > somaxconn)

           backlog = somaxconn;

 

       err = security_socket_listen(sock, backlog);

       if (!err)

           // 调用创建socket是的opsinet_listen

           err = sock->ops->listen(sock, backlog);

 

       fput_light(sock->file, fput_needed);

    }

    return err;

}

 

int inet_listen(struct socket *sock, int backlog)

{

    struct sock *sk = sock->sk;

    unsigned char old_state;

    int err;

 

    lock_sock(sk);

 

    err = -EINVAL;

    if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)

       goto out;

 

    old_state = sk->sk_state;

    if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))

       goto out;

 

    /* Really, if the socket is already in listen state

     * we can only allow the backlog to be adjusted.

     */

    if (old_state != TCP_LISTEN) {

       err = inet_csk_listen_start(sk, backlog);

       if (err)

           goto out;

    }

    sk->sk_max_ack_backlog = backlog;

    err = 0;

out:

    release_sock(sk);

    return err;

}

 

int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)

{

    struct inet_sock *inet = inet_sk(sk);

    struct inet_connection_sock *icsk = inet_csk(sk);

 

    // listen sock分配队列空间

    int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue,

 nr_table_entries);

 

    if (rc != 0)

       return rc;

 

    sk->sk_max_ack_backlog = 0;

    sk->sk_ack_backlog = 0;

    inet_csk_delack_init(sk);

 

    /* There is race window here: we announce ourselves listening,

     * but this transition is still not validated by get_port().

     * It is OK, because this socket enters to hash table only

     * after validation is complete.

     */

    sk->sk_state = TCP_LISTEN; // 设置TCP状态为LISTEN状态

    if (!sk->sk_prot->get_port(sk, inet->inet_num)) {

       inet->inet_sport = htons(inet->inet_num);

 

       // 重置dst_entry(设置为NULL

       sk_dst_reset(sk);

 

       // 将该sock加入listen socks的哈希表中(记录在sk->sk_prot->h.hashinfo中)

       sk->sk_prot->hash(sk);

 

       return 0;

    }

 

    sk->sk_state = TCP_CLOSE;

    __reqsk_queue_destroy(&icsk->icsk_accept_queue);

    return -EADDRINUSE;

}

 

5.    accept()函数

#include

int accept(int sockfd, struct sockaddr *cliaddr, socklen_t *addrlen)

返回非负描述字——成功, -1 ——失败

参数cliaddraddrlen用于返回客户端地址信息。

5.1  accept()系统调用

 

/*

 *  For accept, we attempt to create a new socket, set up the link

 *  with the client, wake up the client, then return the new

 *  connected fd. We collect the address of the connector in kernel

 *  space and move it to user at the very end. This is unclean because

 *  we open the socket then return an error.

 *

 *  1003.1g adds the ability to recvmsg() to query connection pending

 *  status to recvmsg. We need to add that support in a way thats

 *  clean when we restucture accept also.

 */

 

SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,

       int __user *, upeer_addrlen, int, flags)

{

    struct socket *sock, *newsock;

    struct file *newfile;

    int err, len, newfd, fput_needed;

    struct sockaddr_storage address;

 

    if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))

       return -EINVAL;

 

    if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))

       flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;

 

    sock = sockfd_lookup_light(fd, &err, &fput_needed);

    if (!sock)

       goto out;

 

    err = -ENFILE;

    // 新生成一个socket

    newsock = sock_alloc();

    if (!newsock)

       goto out_put;

 

    newsock->type = sock->type;

    newsock->ops = sock->ops;

 

    /*

     * We don't need try_module_get here, as the listening socket (sock)

     * has the protocol module (sock->ops->owner) held.

     */

    __module_get(newsock->ops->owner);

 

    // 为新的socket生成文件和文件描述符

    newfd = sock_alloc_file(newsock, &newfile, flags);

    if (unlikely(newfd < 0)) {

       err = newfd;

       sock_release(newsock);

       goto out_put;

    }

 

    err = security_socket_accept(sock, newsock);

    if (err)

       goto out_fd;

 

    // 调用协议的acceptinet_accept

    err = sock->ops->accept(sock, newsock, sock->file->f_flags);

    if (err < 0)

       goto out_fd;

 

    if (upeer_sockaddr) {

       // 获取新连接的客户端地址

       if (newsock->ops->getname(newsock, (struct sockaddr *)&address,

                    &len, 2) < 0) {

           err = -ECONNABORTED;

           goto out_fd;

       }

       // 返回该客户地址到用户空间

       err = move_addr_to_user((struct sockaddr *)&address,

                  len, upeer_sockaddr, upeer_addrlen);

       if (err < 0)

           goto out_fd;

    }

 

    /* File flags are not inherited via accept() unlike another OSes. */

    fd_install(newfd, newfile);

    err = newfd;

 

out_put:

    fput_light(sock->file, fput_needed);

out:

    return err;

out_fd:

    fput(newfile);

    put_unused_fd(newfd);

    goto out_put;

}

 

 

/*

 *  Accept a pending connection. The TCP layer now gives BSD semantics.

 */

int inet_accept(struct socket *sock, struct socket *newsock, int flags)

{

    struct sock *sk1 = sock->sk;

    int err = -EINVAL;

    // protocol 注册的accepttcp_v4_accept

    struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err);

 

    if (!sk2)

        goto do_err;

 

    lock_sock(sk2);

 

    WARN_ON(!((1 << sk2->sk_state) &

         (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE)));

 

    // protocol AcceptSk内容移动至new socket

    sock_graft(sk2, newsock);

 

    newsock->state = SS_CONNECTED; //socket设置为CONNECTED状态

    err = 0;

    release_sock(sk2);

do_err:

      return err;            

}

 

 

/*

 * This will accept the next outstanding connection.

 */

struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)

{

    struct inet_connection_sock *icsk = inet_csk(sk);

    struct sock *newsk;

    int error;

 

    lock_sock(sk);

 

    /* We need to make sure that this socket is listening,

     * and that it has something pending.

     */

    error = -EINVAL;

    if (sk->sk_state != TCP_LISTEN)

       goto out_err;

 

    /* Find already established connection */

    if (reqsk_queue_empty(&icsk->icsk_accept_queue)) {

       long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);

 

       /* If this is a non blocking socket don't sleep */

       error = -EAGAIN;

       if (!timeo)

           goto out_err;

 

       error = inet_csk_wait_for_connect(sk, timeo);

       if (error)

           goto out_err;

    }

 

    // 从连接队列中(已经完成3次握手)取出头部的一个连接(并从链表中删除)

    newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk);

    WARN_ON(newsk->sk_state == TCP_SYN_RECV);

out:

    release_sock(sk);

    return newsk;

out_err:

    newsk = NULL;

    *err = error;

    goto out;

}

6.    connect()函数

#include

int connect(int sockfd, const struct sockaddr *servaddr, socklen_t addrlen)

返回0——成功, -1——失败

6.1  connect()系统调用

/*

 *  Attempt to connect to a socket with the server address.  The address

 *  is in user space so we verify it is OK and move it to kernel space.

 *

 *  For 1003.1g we need to add clean support for a bind to AF_UNSPEC to

 *  break bindings

 *

 *  NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and

 *  other SEQPACKET protocols that take time to connect() as it doesn't

 *  include the -EINPROGRESS status for such sockets.

 */

SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,

       int, addrlen)

{

    struct socket *sock;

    struct sockaddr_storage address;

    int err, fput_needed;

 

    sock = sockfd_lookup_light(fd, &err, &fput_needed);

    if (!sock)

       goto out;

    err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address);

    if (err < 0)

       goto out_put;

 

    err =

        security_socket_connect(sock, (struct sockaddr *)&address, addrlen);

    if (err)

       goto out_put;

    // socket注册的connect函数(inet_stream_connect

    err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,

               sock->file->f_flags);

out_put:

    fput_light(sock->file, fput_needed);

out:

    return err;

}

 

inet_stream_connect =tcp_v4_connect

/* This will initiate an outgoing connection. */

int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)

{

    struct inet_sock *inet = inet_sk(sk);

    struct tcp_sock *tp = tcp_sk(sk);

    struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;

    struct rtable *rt;

    __be32 daddr, nexthop;

    int tmp;

    int err;

 

    if (addr_len < sizeof(struct sockaddr_in))

       return -EINVAL;

 

    if (usin->sin_family != AF_INET)

       return -EAFNOSUPPORT;

 

    nexthop = daddr = usin->sin_addr.s_addr;

    if (inet->opt && inet->opt->srr) {

       if (!daddr)

           return -EINVAL;

       nexthop = inet->opt->faddr;

    }

 

/* 调用函数ip_route_connect(),寻找合适的路由存放在rt中。ip_route_connect找两次,第一次找到下一跳的ip地址,在路由缓存或fib中找到,然后第二次找到下一跳的具体邻居,到neigh_table中找到*/

    tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr,

                  RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,

                  IPPROTO_TCP,

                  inet->inet_sport, usin->sin_port, sk, 1);

    if (tmp < 0) {

       if (tmp == -ENETUNREACH)

           IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);

       return tmp;

    }

 

    if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {

       ip_rt_put(rt);

       return -ENETUNREACH;

    }

 

    if (!inet->opt || !inet->opt->srr)

       daddr = rt->rt_dst;

 

    if (!inet->inet_saddr)

       inet->inet_saddr = rt->rt_src;

    inet->inet_rcv_saddr = inet->inet_saddr;

 

    if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {

       /* Reset inherited state */

       tp->rx_opt.ts_recent    = 0;

       tp->rx_opt.ts_recent_stamp = 0;

       tp->write_seq        = 0;

    }

 

    if (tcp_death_row.sysctl_tw_recycle &&

        !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {

       struct inet_peer *peer = rt_get_peer(rt);

       /*

        * VJ's idea. We save last timestamp seen from

        * the destination in peer table, when entering state

        * TIME-WAIT * and initialize rx_opt.ts_recent from it,

        * when trying new connection.

        */

       if (peer) {

           inet_peer_refcheck(peer);

           if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {

              tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;

              tp->rx_opt.ts_recent = peer->tcp_ts;

           }

       }

    }

 

    inet->inet_dport = usin->sin_port;

    inet->inet_daddr = daddr;

 

    inet_csk(sk)->icsk_ext_hdr_len = 0;

    if (inet->opt)

       inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;

 

    tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;

 

    /* Socket identity is still unknown (sport may be zero).

     * However we set state to SYN-SENT and not releasing socket

     * lock select source port, enter ourselves into the hash tables and

     * complete initialization after this.

     */

    tcp_set_state(sk, TCP_SYN_SENT);

    err = inet_hash_connect(&tcp_death_row, sk);

    if (err)

       goto failure;

 

    err = ip_route_newports(&rt, IPPROTO_TCP,

              inet->inet_sport, inet->inet_dport, sk);

    if (err)

       goto failure;

 

    /* OK, now commit destination to socket.  */

    sk->sk_gso_type = SKB_GSO_TCPV4;

    sk_setup_caps(sk, &rt->dst);

 

    if (!tp->write_seq)

       tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,

                            inet->inet_daddr,

                            inet->inet_sport,

                            usin->sin_port);

 

    inet->inet_id = tp->write_seq ^ jiffies;

 

    err = tcp_connect(sk);

    rt = NULL;

    if (err)

       goto failure;

 

    return 0;

 

failure:

    /*

     * This unhashes the socket and releases the local port,

     * if necessary.

     */

    tcp_set_state(sk, TCP_CLOSE);

    ip_rt_put(rt);

    sk->sk_route_caps = 0;

    inet->inet_dport = 0;

    return err;

}

阅读(1996) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~