Linux Netlink通信机制详解（下）-frankzfz-ChinaUnix博客

frankzfz

首页　| 　博文目录　| 　关于我

frankzfz

博客访问： 4587487
博文数量： 252
博客积分： 5347
博客等级：大校
技术积分： 13838
用户组：普通用户
注册时间： 2009-09-30 10:13

文章分类

全部博文（252）

bpf笔记（7）
LVS（4）
docker学习（21）
TCP/IP（14）
python（3）
转载好文（4）
Linux通用内核（21）
面试题（24）
ZigBee 技术学习（54）
单片机（2）
Linux驱动程序学（4）
嵌入式调试技术（3）
根文件系统（2）
Linux内核的移植（3）
基础学习（17）
网络学习（17）
ARM学习（7）
Linux操作系统应（11）
Linux编程学习（20）
Bootloader（12）
未分配的博文（2）

文章存档

2022年（12）

2017年（11）

2016年（7）

2015年（14）

2014年（20）

2012年（9）

2011年（20）

2010年（153）

2009年（6）

我的朋友

相关博文

Linux Netlink通信机制详解（下）

分类： LINUX

2011-10-25 19:48:48

这里我以路由中的netlink为例，看一下内核中的处理流程是怎么样的！在/kernel/net/core/rtnetlink.c文件中，有一个接收从用户空间过来的Netlink消息的函数。

static void rtnetlink_rcv(struct sock *sk, int len)
{
unsigned int qlen = 0;
do {
rtnl_lock();
netlink_run_queue(sk, &qlen, &rtnetlink_rcv_msg);
up(&rtnl_sem);
netdev_run_todo();
} while (qlen);
}

上面的内核函数就是用来接收用户路由方面Netlink消息的，当我们使用route命令添加一条路由时，就会调用该函数接收。该函数是再netlink的初始化是注册的。同样在rtnetlink.c文件中。

void __init rtnetlink_init(void)
{
int i;
rtattr_max = 0;
for (i = 0; i < ARRAY_SIZE(rta_max); i++)
if (rta_max[i] > rtattr_max)
rtattr_max = rta_max[i];
rta_buf = kmalloc(rtattr_max * sizeof(struct rtattr *), GFP_KERNEL);
if (!rta_buf)
panic("rtnetlink_init: cannot allocate rta_buf\n");
rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
THIS_MODULE);//在创建内核的netlink时，注册了路由netlink的接收函数，rtnetlink_rcv.
if (rtnl == NULL)
panic("rtnetlink_init: cannot initialize rtnetlink\n");
netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
register_netdevice_notifier(&rtnetlink_dev_notifier);
rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table;
rtnetlink_links[PF_PACKET] = link_rtnetlink_table;
}

在netlink_kernel_create函数中，可以看到内核接收用户空间传过来的消息的接收函数，

struct sock *
netlink_kernel_create(int unit, unsigned int groups,
void (*input)(struct sock *sk, int len),
struct module *module)
{
struct socket *sock;
struct sock *sk;
struct netlink_sock *nlk;
if (!nl_table)
return NULL;
if (unit<0 || unit>=MAX_LINKS)
return NULL;
if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
return NULL;
if (__netlink_create(sock, unit) < 0)
goto out_sock_release;
sk = sock->sk;
sk->sk_data_ready = netlink_data_ready;
if (input)
nlk_sk(sk)->data_ready = input;//设置内核接收Netlink消息的函数，这里就是前面的rtnetlink_rcv函数
if (netlink_insert(sk, 0))
goto out_sock_release;
nlk = nlk_sk(sk); //取得sock嵌入的netlink_sock结构体
nlk->flags |= NETLINK_KERNEL_SOCKET;
netlink_table_grab();
nl_table[unit].groups = groups < 32 ? 32 : groups;
nl_table[unit].module = module;
nl_table[unit].registered = 1;// 更新netlink_table结构体信息，每中协议对应一个netlink_
table结构
netlink_table_ungrab();
return sk;
out_sock_release:
sock_release(sock);
return NULL;
}

到此，内核创建netlink到接收用户空间发送过来消息整个流程就清晰了。那当我们添加一条新路由时，在接收函数rtnetlink_rcv中的循环中，会从一个队列中调用实际的接收处理函数，这里为rtnetlink_rcv_msg函数。

/**
* nelink_run_queue - Process netlink receive queue.
* @sk: Netlink socket containing the queue
* @qlen: Place to store queue length upon entry
* @cb: Callback function invoked for each netlink message found
*
* Processes as much as there was in the queue upon entry and invokes
* a callback function for each netlink message found. The callback
* function may refuse a message by returning a negative error code
* but setting the error pointer to 0 in which case this function
* returns with a qlen != 0.
*
* qlen must be initialized to 0 before the initial entry, afterwards
* the function may be called repeatedly until qlen reaches 0.
*/
void netlink_run_queue(struct sock *sk, unsigned int *qlen,
int (*cb)(struct sk_buff *, struct nlmsghdr *, int *))
{
struct sk_buff *skb;
if (!*qlen || *qlen > skb_queue_len(&sk->sk_receive_queue))
*qlen = skb_queue_len(&sk->sk_receive_queue);
for (; *qlen; (*qlen)--) {
skb = skb_dequeue(&sk->sk_receive_queue);
if (netlink_rcv_skb(skb, cb)) {
if (skb->len)
skb_queue_head(&sk->sk_receive_queue, skb);
else {
kfree_skb(skb);
(*qlen)--;
}
break;
}
kfree_skb(skb);
}
}

下面是rtnetlink_rcv_msg()函数的实现，对netlink消息进行相应的处理。其中有一个数据结构

struct rtnetlink_link *link; 其定义如下：是两个不同的处理函数

struct rtnetlink_link
{
int (*doit)(struct sk_buff *, struct nlmsghdr*, void *attr);
int (*dumpit)(struct sk_buff *, struct netlink_callback *cb);
};
/* Process one rtnetlink message. */
static __inline__ int
rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
{
struct rtnetlink_link *link;
struct rtnetlink_link *link_tab;
int sz_idx, kind;
int min_len;
int family;
int type;
int err;
/* Only requests are handled by kernel now */
if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
return 0;
type = nlh->nlmsg_type;
/* A control message: ignore them */
if (type < RTM_BASE)
return 0;
/* Unknown message: reply with EINVAL */
if (type > RTM_MAX)
goto err_inval;
type -= RTM_BASE;
/* All the messages must have at least 1 byte length */
if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg)))
return 0;
family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
if (family >= NPROTO) {
*errp = -EAFNOSUPPORT;
return -1;
}
link_tab = rtnetlink_links[family];//根据用户空间传过来的不同德family类型，调用不同的处理函数，这里以路由为例的话为AF_ROUTE或者AF_NETLINK
if (link_tab == NULL)
link_tab = rtnetlink_links[PF_UNSPEC];
link = &link_tab[type]; //根据不同的type调用不同的处理函数。这里的type为RTM_NEWROUTE
sz_idx = type>>2;
kind = type&3;
if (kind != 2 && security_netlink_recv(skb)) {
*errp = -EPERM;
return -1;
}
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
if (link->dumpit == NULL)
link = &(rtnetlink_links[PF_UNSPEC][type]);
if (link->dumpit == NULL)
goto err_inval;
if ((*errp = netlink_dump_start(rtnl, skb, nlh,
link->dumpit, NULL)) != 0) {
return -1;
}
netlink_queue_skip(nlh, skb);
return -1;
}
memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
min_len = rtm_min[sz_idx];
if (nlh->nlmsg_len < min_len)
goto err_inval;
if (nlh->nlmsg_len > min_len) {
int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len);
while (RTA_OK(attr, attrlen)) {
unsigned flavor = attr->rta_type;
if (flavor) {
if (flavor > rta_max[sz_idx])
goto err_inval;
rta_buf[flavor-1] = attr;
}
attr = RTA_NEXT(attr, attrlen);
}
}
if (link->doit == NULL)
link = &(rtnetlink_links[PF_UNSPEC][type]);
if (link->doit == NULL)
goto err_inval;
err = link->doit(skb, nlh, (void *)&rta_buf[0]);//此处调用RTM_NEWROUTE，对应的route处理函数，也就是下面的inet6_rtm_newroute函数。
*errp = err;
return err;
err_inval:
*errp = -EINVAL;
return -1;
}
int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct rtmsg *r = NLMSG_DATA(nlh);
struct in6_rtmsg rtmsg;
if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
return -EINVAL;
return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
}

inet6_rtm_newroute函数通过下面的数组进行了相应的注册处理，所以上面的link->doit(skb, nlh, (void *)&rta_buf[0])就是根据下面的这个调用的。

static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = {
[RTM_GETLINK - RTM_BASE] = { .dumpit = inet6_dump_ifinfo, },
[RTM_NEWADDR - RTM_BASE] = { .doit = inet6_rtm_newaddr, },
[RTM_DELADDR - RTM_BASE] = { .doit = inet6_rtm_deladdr, },
[RTM_GETADDR - RTM_BASE] = { .dumpit = inet6_dump_ifaddr, },
[RTM_GETMULTICAST - RTM_BASE] = { .dumpit = inet6_dump_ifmcaddr, },
[RTM_GETANYCAST - RTM_BASE] = { .dumpit = inet6_dump_ifacaddr, },
[RTM_NEWROUTE - RTM_BASE] = { .doit = inet6_rtm_newroute, },
[RTM_DELROUTE - RTM_BASE] = { .doit = inet6_rtm_delroute, },
[RTM_GETROUTE - RTM_BASE] = { .doit = inet6_rtm_getroute,
.dumpit = inet6_dump_fib, },
};