上篇介绍到了pf->create函数,这个函数调用完成后socket的创建就算结束了。不同协议簇下pf->create的实现是不同的,这里暂时选择AF_INET协议簇进行分析,其他再补充。
先看一下pf->create对应了那个函数,以及使如何对应上的
-
static int __init inet_init(void)
-
{
-
......
-
(void)sock_register(&inet_family_ops);
-
-
for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)
-
inet_register_protosw(q);
-
......
-
}
inet_init是在网络模块初始化要调用的初始化函数,这个函数完成的任务不少,暂时我们先关注上面两个部分,其余部分后续还会再回头来看。
-
static struct net_proto_family inet_family_ops = {
-
.family = PF_INET,
-
.create = inet_create,
-
.owner = THIS_MODULE,
-
};
-
int sock_register(const struct net_proto_family *ops)
-
{
-
......
-
spin_lock(&net_family_lock);
-
if (net_families[ops->family])
-
err = -EEXIST;
-
else {
-
net_families[ops->family] = ops;
-
err = 0;
-
}
-
......
-
}
sock_register中会把inet_family_ops注册到net_families数组中,根据具体协议簇的类型进行索引调用,我们回想一下在__sock_create时,按照如下方式调用
-
pf = rcu_dereference(net_families[family]);
-
err = pf->create(net, sock, protocol);
先根据用户指定的family在net_families中索引对应的net_proto_family的结构,AF_INET对应的是inet_family_ops,所以pf->create对应了inet_create函数。
下面看一下inet_create函数的实现
-
static int inet_create(struct net *net, struct socket *sock, int protocol)
-
{
-
......
-
sock->state = SS_UNCONNECTED;
-
......
-
list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {
-
......
-
}
-
......
-
sock->ops = answer->ops;
-
answer_prot = answer->prot;
-
answer_no_check = answer->no_check;
-
answer_flags = answer->flags;
-
......
-
sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot);
-
if (sk == NULL)
-
goto out;
-
-
err = 0;
-
sk->sk_no_check = answer_no_check;
-
if (INET_PROTOSW_REUSE & answer_flags)
-
sk->sk_reuse = 1;
-
-
inet = inet_sk(sk);
-
inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
-
-
if (SOCK_RAW == sock->type) {
-
inet->num = protocol;
-
if (IPPROTO_RAW == protocol)
-
inet->hdrincl = 1;
-
}
-
-
if (ipv4_config.no_pmtu_disc)
-
inet->pmtudisc = IP_PMTUDISC_DONT;
-
else
-
inet->pmtudisc = IP_PMTUDISC_WANT;
-
-
inet->id = 0;
-
-
sock_init_data(sock, sk);
-
-
sk->sk_destruct = inet_sock_destruct;
-
sk->sk_protocol = protocol;
-
sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
-
-
inet->uc_ttl = -1;
-
inet->mc_loop = 1;
-
inet->mc_ttl = 1;
-
inet->mc_all = 1;
-
inet->mc_index = 0;
-
inet->mc_list = NULL;
-
-
sk_refcnt_debug_inc(sk);
-
-
if (inet->num) {
-
/* It assumes that any protocol which allows
-
* the user to assign a number at socket
-
* creation time automatically
-
* shares.
-
*/
-
inet->sport = htons(inet->num);
-
/* Add to protocol hash chains. */
-
sk->sk_prot->hash(sk);
-
}
-
-
if (sk->sk_prot->init) {
-
err = sk->sk_prot->init(sk);
-
if (err)
-
sk_common_release(sk);
-
}
-
}
首先将sock->state设置为未连接状态SS_UNCONNECTED,然后根据用户指定的type()在inetsw查找对应的inet_protosw结构。
这里需要先确认一下inetsw里面对应了哪些内容以及使如何注册上的,现在可以回头看看inet_init了,本文的最上面部分代码,先遍历inetsw_array数组,然后对每个元素调用inet_register_protosw函数进行注册。看看inetsw_array存储了什么。
-
static struct inet_protosw inetsw_array[] =
-
{
-
{
-
.type = SOCK_STREAM,
-
.protocol = IPPROTO_TCP,
-
.prot = &tcp_prot,
-
.ops = &inet_stream_ops,
-
.capability = -1,
-
.no_check = 0,
-
.flags = INET_PROTOSW_PERMANENT |
-
INET_PROTOSW_ICSK,
-
},
-
-
{
-
.type = SOCK_DGRAM,
-
.protocol = IPPROTO_UDP,
-
.prot = &udp_prot,
-
.ops = &inet_dgram_ops,
-
.capability = -1,
-
.no_check = UDP_CSUM_DEFAULT,
-
.flags = INET_PROTOSW_PERMANENT,
-
},
-
-
-
{
-
.type = SOCK_RAW,
-
.protocol = IPPROTO_IP, /* wild card */
-
.prot = &raw_prot,
-
.ops = &inet_sockraw_ops,
-
.capability = CAP_NET_RAW,
-
.no_check = UDP_CSUM_DEFAULT,
-
.flags = INET_PROTOSW_REUSE,
-
}
-
};
对应了常用的三种类型,(SOCK_STREAM/IPPROTO_TCP), (SOCK_DGRAM/IPPROTO_UDP), (SOCK_RAW/IPPROTO_IP),用户通过type指定。上面的遍历就比较明显了分别把这三种type进行注册,看看注册到哪里去了。
-
void inet_register_protosw(struct inet_protosw *p)
-
{
-
......
-
answer = NULL;
-
last_perm = &inetsw[p->type];
-
list_for_each(lh, &inetsw[p->type]) {
-
answer = list_entry(lh, struct inet_protosw, list);
-
-
/* Check only the non-wild match. */
-
if (INET_PROTOSW_PERMANENT & answer->flags) {
-
if (protocol == answer->protocol)
-
break;
-
last_perm = lh;
-
}
-
answer = NULL;
-
}
-
}
很明显,注册到了inetsw中,现在可以回头继续分析inet_create函数了,answer实际对应了TCP/UDP/RAW的inet_protosw结构,进行一系列赋值,然后调用sk_alloc函数。
-
struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot)
-
{
-
struct sock *sk;
-
-
sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
-
if (sk) {
-
sk->sk_family = family;
-
sk->sk_prot = sk->sk_prot_creator = prot;
-
sock_lock_init(sk);
-
sock_net_set(sk, get_net(net));
-
atomic_set(&sk->sk_wmem_alloc, 1);
-
}
-
return sk;
-
}
sk_prot_alloc实际上是在prot对应的slab中或者直接kmalloc申请一个sock结构,然后设置协议簇,以及proto结构信息。这里需要注意一下,目前返回的是一个struct sock的结构,而实际上并不仅仅是这个简单的结构。我们先回到inet_create函数中继续向下分析。可以看到申请完sock之后,inet_create函数中使用了inet = inet_sk(sk)对sock进行了强制转换,然后对转换得到的inet结构进行一系列赋值。而inet_sock是INET域专用的一个sock表示,在struct sock上扩展而来,除基本sock属性,提供了INET域专用的属性,如TTL、IP地址、端口等。这里是怎么对应上的呢。还得再回头研究一下,申请sock的最终实现,申请sock时最终走的是下面的函数,前面简单提到过,但没深入。
-
static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,int family)
-
{
-
struct sock *sk;
-
struct kmem_cache *slab;
-
-
slab = prot->slab;
-
if (slab != NULL) {
-
.......
-
else
-
sk = kmalloc(prot->obj_size, priority);
sock的申请是通过kmalloc实现的,申请的大小由prot->obj_size指定,而prot->obj_size是由上面static struct inet_protosw inetsw_array[]数组成员结构中的proto结构中指定的,而这个Proto结构区分了TCP/UDP/RAW协议,见上面数组定义。其中tcp_prot的.obj_size = sizeof(struct tcp_sock),
udp_prot的.obj_size = sizeof(struct udp_sock), raw_prot的.obj_size = sizeof(struct raw_sock)。再看一下这几个结构的具体定义:
-
struct tcp_sock {
-
/* inet_connection_sock has to be the first member of tcp_sock */
-
struct inet_connection_sock inet_conn;
-
u16 tcp_header_len; /* Bytes of tcp header to send */
-
u16 xmit_size_goal_segs; /* Goal for segmenting output packets */
-
-
/*
-
* Header prediction flags
-
* 0x5?10 << 16 + snd_wnd in net byte order
-
*/
-
__be32 pred_flags;
-
........
-
struct udp_sock {
-
/* inet_sock has to be the first member */
-
struct inet_sock inet;
-
int pending; /* Any pending frames ? */
-
unsigned int corkflag; /* Cork is required */
-
__u16 encap_type; /* Is this an Encapsulation socket? */
-
/*
-
......
-
struct raw_sock {
-
/* inet_sock has to be the first member */
-
struct inet_sock inet;
-
struct icmp_filter filter;
-
};
除了tcp_sock第一个对应的是inet_connection_sock外,udp和raw对应的都是inet_sock。我们看一下inet_connection_sock
-
struct inet_connection_sock {
-
/* inet_sock has to be the first */
-
struct inet_sock icsk_inet;
-
struct request_sock_queue icsk_accept_queue;
-
struct inet_bind_bucket *icsk_bind_hash;
-
unsigned long icsk_timeout;
-
......
-
-
struct inet_sock {
-
/* sk and pinet6 has to be the first two members of inet_sock */
-
struct sock sk;
-
......
inet_connection_sock第一个成员也是inet_sock,而inet_sock的第一个成员是struct sock。这样就比较清晰了,可以这么理解struct sock是一个基类是Linux socket的最基础的面向协议栈的结构,而inet_sock在继承sock的基础上进行了扩展,除了基本socket属性外提供了INET域专用的属性,如TTL等。简单的,udp_sock和raw_sock在inet_sock的基础上进行了私有扩展。inet_connection_sock在继承inet_sock的基础上,扩展了所有面向连接相关的一些协议信息,而tcp_sock则在inet_connection_sock基础上进一步扩展,增加了TCP的一些私有属性,比如滑动窗口,拥塞控制等。至此,上述inet_create函数中inet = inet_sk(sk);的转换就没有任何疑问了。
回到inet_create,除了对inet_sk结构的赋值外,也对struct sock结构进行了一系列的初始化,主要涉及收发包队列/缓冲区等。最后调用具体协议类型init函数,这里主要是对应了tcp_v4_init_sock的函数,而udp没有定义init,raw的init函数仅仅是把filter字段给清0了。
阅读(3800) | 评论(0) | 转发(0) |