Chinaunix首页 | 论坛 | 博客
  • 博客访问: 1321483
  • 博文数量: 107
  • 博客积分: 10155
  • 博客等级: 上将
  • 技术积分: 2166
  • 用 户 组: 普通用户
  • 注册时间: 2008-03-25 16:57
文章分类

全部博文(107)

文章存档

2010年(1)

2009年(1)

2008年(105)

分类: LINUX

2008-12-08 12:01:07

 

我们继续往下看

 

sys_socketcall()-->sys_socket()-->sock_create()-->__sock_create()-->通过pf->create()--> inet_create()

lookup_protocol:
    err = -ESOCKTNOSUPPORT;/* wumingxiaozu */
    rcu_read_lock();
    list_for_each_rcu(p, &inetsw[sock->type]) {
        answer = list_entry(p, struct inet_protosw, list);

        /* Check the non-wild match. */
        if (protocol == answer->protocol) {
            if (protocol != IPPROTO_IP)
                break;
        } else {
            /* Check for the two wild cases. */
            if (IPPROTO_IP == protocol) {
                protocol = answer->protocol;/* wumingxiaozu */
                break;
            }
            if (IPPROTO_IP == answer->protocol)
                break;
        }
        err = -EPROTONOSUPPORT;
        answer = NULL;
    }

    if (unlikely(answer == NULL)) {
        if (try_loading_module < 2) {
            rcu_read_unlock();/* wumingxiaozu */
            /*
             * Be more specific, e.g. net-pf-2-proto-132-type-1
             * (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM)
             */
            if (++try_loading_module == 1)
                request_module("net-pf-%d-proto-%d-type-%d",
                     PF_INET, protocol, sock->type);/*  */
            /*
             * Fall back to generic, e.g. net-pf-2-proto-132
             * (net-pf-PF_INET-proto-IPPROTO_SCTP)
             */
            else
                request_module("net-pf-%d-proto-%d",
                     PF_INET, protocol);
            goto lookup_protocol;
        } else
            goto out_rcu_unlock;
    }

    err = -EPERM;
    if (answer->capability > 0 && !capable(answer->capability))
        goto out_rcu_unlock;

    err = -EAFNOSUPPORT;
    if (!inet_netns_ok(net, protocol))
        goto out_rcu_unlock;

    sock->ops = answer->ops;
    answer_prot = answer->prot;
    answer_no_check = answer->no_check;
    answer_flags = answer->flags;
    rcu_read_unlock();

    BUG_TRAP(answer_prot->slab != NULL);

    err = -ENOBUFS;
    sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot);
    if (sk == NULL)
        goto out;

    err = 0;
    sk->sk_no_check = answer_no_check;
    if (INET_PROTOSW_REUSE & answer_flags)
        sk->sk_reuse = 1;

    inet = inet_sk(sk);
    inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;

    if (SOCK_RAW == sock->type) {
        inet->num = protocol;
        if (IPPROTO_RAW == protocol)
            inet->hdrincl = 1;
    }

    if (ipv4_config.no_pmtu_disc)
        inet->pmtudisc = IP_PMTUDISC_DONT;
    else
        inet->pmtudisc = IP_PMTUDISC_WANT;

    inet->id = 0;

    sock_init_data(sock, sk);

    sk->sk_destruct     = inet_sock_destruct;
    sk->sk_family     = PF_INET;
    sk->sk_protocol     = protocol;
    sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;

    inet->uc_ttl    = -1;
    inet->mc_loop    = 1;
    inet->mc_ttl    = 1;
    inet->mc_index    = 0;
    inet->mc_list    = NULL;

    sk_refcnt_debug_inc(sk);

    if (inet->num) {
        /* It assumes that any protocol which allows
         * the user to assign a number at socket
         * creation time automatically
         *
         */
        inet->sport = htons(inet->num);
        /* Add to protocol hash chains. */
        sk->sk_prot->hash(sk);
    }

    if (sk->sk_prot->init) {
        err = sk->sk_prot->init(sk);
        if (err)
            sk_common_release(sk);
    }
out:
    return err;
out_rcu_unlock:
    rcu_read_unlock();
    goto out;
}

这段代码看似复杂其实分析起来并不算难,首先上面

list_for_each_rcu(p, &inetsw[sock->type])是一个宏,我们看一下

#define list_for_each_rcu(pos, head) \
    for (pos = rcu_dereference((head)->next); \
        prefetch(pos->next), pos != (head); \
        pos = rcu_dereference(pos->next))

这段宏我贴些资料供大家理解,下面这些内容出自

RCURead-Copy Update)通过延迟写操作来提高同步性能,具体请参见第3章。这里只分析具有RCU的链表。
RCU常用来保护读操作占多数的链表与数组。具有RCU的链表的操作函数与普通链表操作函数的区别是在函数名后加上了_rcu,如list_for_each_rcu函数。
函数list_for_each_rcu的功能是遍历一个rcu保护的链表。其中,参数pos表示用来做链表位置计数的&struct list_head结构,参数head表示链表头。只要遍历被rcu_read_lock()保护,使用诸如list_add_rcu()的函数对链表同时访问是安全的。
函数List_for_each_rcu列出如下:

#define list_for_each_rcu(pos, head) \
    for (pos = (head)->next, prefetch(pos->next); pos != (head); \
            pos = rcu_dereference(pos->next), prefetch(pos->next))

函数rcu_dereferenceRCU读临界部分中取出一个RCU保护的指针。在需要内存屏障的体系中进行内存屏障(目前只有Alpha体系需要),函数列出如下:

#define rcu_dereference(p) ({ \
                typeof(p) _________p1 = p; \
                smp_read_barrier_depends(); \
                (_________p1); \
                })

include/asm-i386/system.h中:

#define smp_read_barrier_depends()    read_barrier_depends()

很明显上面的宏就是循环检查inetsw数组找到符合我们socket类型的链头,那么这个数组是什么时候初始化的呢?我们再象上一节那样看一下

static int __init inet_init(void)
{
。。。。。。
    for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)
        inet_register_protosw(q);
。。。。。。
}

我们看到他循环用inet_register_protosw()函数处理inetsw_array数组中的元素

static struct inet_protosw inetsw_array[] =
{
    {
        .type = SOCK_STREAM,
        .protocol = IPPROTO_TCP,
        .prot = &tcp_prot,
        .ops = &inet_stream_ops,
        .capability = -1,
        .no_check = 0,
        .flags = INET_PROTOSW_PERMANENT |
             INET_PROTOSW_ICSK,
    },

    {
        .type = SOCK_DGRAM,
        .protocol = IPPROTO_UDP,
        .prot = &udp_prot,
        .ops = &inet_dgram_ops,
        .capability = -1,
        .no_check = UDP_CSUM_DEFAULT,
        .flags = INET_PROTOSW_PERMANENT,
       },


       {
     .type = SOCK_RAW,
     .protocol = IPPROTO_IP,    /* wild card */
     .prot = &raw_prot,
     .ops = &inet_sockraw_ops,
     .capability = CAP_NET_RAW,/* wumingxiaozu */
     .no_check = UDP_CSUM_DEFAULT,
     .flags = INET_PROTOSW_REUSE,
       }
};

我们已经在前边看到了inet_protosw结构的定义,所以结合这个数组我们可以看出数组定义了三种类型的socket与网络层的接口内容,一个是用于tcp数据流的使用的,它所使用的IP协议标识码IPPROTO_TCP是传输控制协议,而另一个是udp的数据报使用的,它所使用的IP协议标识码IPPROTO_UDP是表示用户数据报协议,而第三个则是raw原始的协议使用的,可以由自行开发自己的ip协议,有很多IP欺骗程序都是使用的这种类型的协议,这里标识码IPPROTO_IP是“虚拟的IP协议”类型,为使们能够理解上面函数的代码我们再次结合应用程序的练习看一下

server_fd = socket(AF_INET, SOCK_STREAM, 0);

继续往下看之前我们先完成上面inet_init()函数注册登记inetsw的过程,他是被下面的函数登记到数组中的,篇幅太长,转下一篇继续

 

 

 

阅读(10122) | 评论(0) | 转发(5) |
给主人留下些什么吧!~~