Linux Netlink通信机制详解（下）-zhanglin496-ChinaUnix博客

mxl&nbsp;ChinaUnix博客zhanglin.blog.chinaunix.net

首页　| 　博文目录　| 　关于我

zhanglin496

博客访问： 1029196
博文数量： 442
博客积分： 1146
博客等级：少尉
技术积分： 1604
用户组：普通用户
注册时间： 2010-11-04 12:52

个人简介

123

文章分类

全部博文（442）

RCU机制（2）
tc流量控制（2）
ipv6（1）
内核启动流程（5）
openwrt（7）
bootloader（3）
http（1）
C++（3）
网络概念（0）
多线程编程（2）
常用算法（14）
linux操作（9）
内核同步（26）

内存屏障（5）
linux内核（191）

RCU机制（3）

netfilter-nat分（3）

skb解析（5）

定时器实现（1）

poll分析（4）

proc系统（5）

linux-IPC（13）

netfilter（84）

linux数据结构（17）

linux驱动（11）
git（3）
linux配置（7）
个人经验总结（6）
调试（7）
Unix C（40）
TCP/IP（26）
未分配的博文（87）

文章存档

2017年（3）

2016年（15）

2015年（132）

2014年（52）

2013年（101）

2012年（110）

2011年（29）

我的朋友

最近访客

推荐博文

Linux Netlink通信机制详解（下）

分类：

2012-04-27 22:05:01

原文地址：Linux Netlink通信机制详解（下）作者：frankzfz

这里我以路由中的netlink为例，看一下内核中的处理流程是怎么样的！在/kernel/net/core/rtnetlink.c文件中，有一个接收从用户空间过来的Netlink消息的函数。

static void rtnetlink_rcv(struct sock *sk, int len)
{
unsigned int qlen = 0;
do {
rtnl_lock();
netlink_run_queue(sk, &qlen, &rtnetlink_rcv_msg);
up(&rtnl_sem);
netdev_run_todo();
} while (qlen);
}

上面的内核函数就是用来接收用户路由方面Netlink消息的，当我们使用route命令添加一条路由时，就会调用该函数接收。该函数是再netlink的初始化是注册的。同样在rtnetlink.c文件中。

void __init rtnetlink_init(void)
{
int i;
rtattr_max = 0;
for (i = 0; i < ARRAY_SIZE(rta_max); i++)
if (rta_max[i] > rtattr_max)
rtattr_max = rta_max[i];
rta_buf = kmalloc(rtattr_max * sizeof(struct rtattr *), GFP_KERNEL);
if (!rta_buf)
panic("rtnetlink_init: cannot allocate rta_buf\n");
rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
THIS_MODULE);//在创建内核的netlink时，注册了路由netlink的接收函数，rtnetlink_rcv.
if (rtnl == NULL)
panic("rtnetlink_init: cannot initialize rtnetlink\n");
netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
register_netdevice_notifier(&rtnetlink_dev_notifier);
rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table;
rtnetlink_links[PF_PACKET] = link_rtnetlink_table;
}

在netlink_kernel_create函数中，可以看到内核接收用户空间传过来的消息的接收函数，

struct sock *
netlink_kernel_create(int unit, unsigned int groups,
void (*input)(struct sock *sk, int len),
struct module *module)
{
struct socket *sock;
struct sock *sk;
struct netlink_sock *nlk;
if (!nl_table)
return NULL;
if (unit<0 || unit>=MAX_LINKS)
return NULL;
if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
return NULL;
if (__netlink_create(sock, unit) < 0)
goto out_sock_release;
sk = sock->sk;
sk->sk_data_ready = netlink_data_ready;
if (input)
nlk_sk(sk)->data_ready = input;//设置内核接收Netlink消息的函数，这里就是前面的rtnetlink_rcv函数
if (netlink_insert(sk, 0))
goto out_sock_release;
nlk = nlk_sk(sk); //取得sock嵌入的netlink_sock结构体
nlk->flags |= NETLINK_KERNEL_SOCKET;
netlink_table_grab();
nl_table[unit].groups = groups < 32 ? 32 : groups;
nl_table[unit].module = module;
nl_table[unit].registered = 1;// 更新netlink_table结构体信息，每中协议对应一个netlink_
table结构
netlink_table_ungrab();
return sk;
out_sock_release:
sock_release(sock);
return NULL;
}

到此，内核创建netlink到接收用户空间发送过来消息整个流程就清晰了。那当我们添加一条新路由时，在接收函数rtnetlink_rcv中的循环中，会从一个队列中调用实际的接收处理函数，这里为rtnetlink_rcv_msg函数。

/**
* nelink_run_queue - Process netlink receive queue.
* @sk: Netlink socket containing the queue
* @qlen: Place to store queue length upon entry
* @cb: Callback function invoked for each netlink message found
*
* Processes as much as there was in the queue upon entry and invokes
* a callback function for each netlink message found. The callback
* function may refuse a message by returning a negative error code
* but setting the error pointer to 0 in which case this function
* returns with a qlen != 0.
*
* qlen must be initialized to 0 before the initial entry, afterwards
* the function may be called repeatedly until qlen reaches 0.
*/
void netlink_run_queue(struct sock *sk, unsigned int *qlen,
int (*cb)(struct sk_buff *, struct nlmsghdr *, int *))
{
struct sk_buff *skb;
if (!*qlen || *qlen > skb_queue_len(&sk->sk_receive_queue))
*qlen = skb_queue_len(&sk->sk_receive_queue);
for (; *qlen; (*qlen)--) {
skb = skb_dequeue(&sk->sk_receive_queue);
if (netlink_rcv_skb(skb, cb)) {
if (skb->len)
skb_queue_head(&sk->sk_receive_queue, skb);
else {
kfree_skb(skb);
(*qlen)--;
}
break;
}
kfree_skb(skb);
}
}

下面是rtnetlink_rcv_msg()函数的实现，对netlink消息进行相应的处理。其中有一个数据结构

struct rtnetlink_link *link; 其定义如下：是两个不同的处理函数

struct rtnetlink_link
{
int (*doit)(struct sk_buff *, struct nlmsghdr*, void *attr);
int (*dumpit)(struct sk_buff *, struct netlink_callback *cb);
};
/* Process one rtnetlink message. */
static __inline__ int
rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
{
struct rtnetlink_link *link;
struct rtnetlink_link *link_tab;
int sz_idx, kind;
int min_len;
int family;
int type;
int err;
/* Only requests are handled by kernel now */
if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
return 0;
type = nlh->nlmsg_type;
/* A control message: ignore them */
if (type < RTM_BASE)
return 0;
/* Unknown message: reply with EINVAL */
if (type > RTM_MAX)
goto err_inval;
type -= RTM_BASE;
/* All the messages must have at least 1 byte length */
if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg)))
return 0;
family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
if (family >= NPROTO) {
*errp = -EAFNOSUPPORT;
return -1;
}
link_tab = rtnetlink_links[family];//根据用户空间传过来的不同德family类型，调用不同的处理函数，这里以路由为例的话为AF_ROUTE或者AF_NETLINK
if (link_tab == NULL)
link_tab = rtnetlink_links[PF_UNSPEC];
link = &link_tab[type]; //根据不同的type调用不同的处理函数。这里的type为RTM_NEWROUTE
sz_idx = type>>2;
kind = type&3;
if (kind != 2 && security_netlink_recv(skb)) {
*errp = -EPERM;
return -1;
}
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
if (link->dumpit == NULL)
link = &(rtnetlink_links[PF_UNSPEC][type]);
if (link->dumpit == NULL)
goto err_inval;
if ((*errp = netlink_dump_start(rtnl, skb, nlh,
link->dumpit, NULL)) != 0) {
return -1;
}
netlink_queue_skip(nlh, skb);
return -1;
}
memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
min_len = rtm_min[sz_idx];
if (nlh->nlmsg_len < min_len)
goto err_inval;
if (nlh->nlmsg_len > min_len) {
int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len);
while (RTA_OK(attr, attrlen)) {
unsigned flavor = attr->rta_type;
if (flavor) {
if (flavor > rta_max[sz_idx])
goto err_inval;
rta_buf[flavor-1] = attr;
}
attr = RTA_NEXT(attr, attrlen);
}
}
if (link->doit == NULL)
link = &(rtnetlink_links[PF_UNSPEC][type]);
if (link->doit == NULL)
goto err_inval;
err = link->doit(skb, nlh, (void *)&rta_buf[0]);//此处调用RTM_NEWROUTE，对应的route处理函数，也就是下面的inet6_rtm_newroute函数。
*errp = err;
return err;
err_inval:
*errp = -EINVAL;
return -1;
}
int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct rtmsg *r = NLMSG_DATA(nlh);
struct in6_rtmsg rtmsg;
if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
return -EINVAL;
return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
}

inet6_rtm_newroute函数通过下面的数组进行了相应的注册处理，所以上面的link->doit(skb, nlh, (void *)&rta_buf[0])就是根据下面的这个调用的。

static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = {
[RTM_GETLINK - RTM_BASE] = { .dumpit = inet6_dump_ifinfo, },
[RTM_NEWADDR - RTM_BASE] = { .doit = inet6_rtm_newaddr, },
[RTM_DELADDR - RTM_BASE] = { .doit = inet6_rtm_deladdr, },
[RTM_GETADDR - RTM_BASE] = { .dumpit = inet6_dump_ifaddr, },
[RTM_GETMULTICAST - RTM_BASE] = { .dumpit = inet6_dump_ifmcaddr, },
[RTM_GETANYCAST - RTM_BASE] = { .dumpit = inet6_dump_ifacaddr, },
[RTM_NEWROUTE - RTM_BASE] = { .doit = inet6_rtm_newroute, },
[RTM_DELROUTE - RTM_BASE] = { .doit = inet6_rtm_delroute, },
[RTM_GETROUTE - RTM_BASE] = { .doit = inet6_rtm_getroute,
.dumpit = inet6_dump_fib, },
};