Linux Netlink通信机制详解（下）-yww680169-ChinaUnix博客

yww680169

首页　| 　博文目录　| 　关于我

yww680169

博客访问： 485515
博文数量： 185
博客积分： 10
博客等级：民兵
技术积分： 681
用户组：普通用户
注册时间： 2011-08-06 21:45

个人简介

为梦而战

文章分类

全部博文（185）

vim（1）
python（4）
Doxgen+Graphviz+（1）
C++（6）

boost（1）

关键字详解（2）
代码阅读（3）
C语言基础（1）
java（2）
PF_RING（1）
软件架构设计（17）

设计模式（4）

EA（5）

UML（1）
linux环境搭建（3）
linux内核裁剪记（0）
linux内核裁剪（3）

yww的linux内核裁（2）
虚拟化（43）

KVM（2）

openvswitch（5）

xen（3）

libvirt（7）
网络编程（11）

netlink（5）
linux命令（1）
SR-IOV（2）
snmp（1）
APUE（4）
openssl（6）
数据结构（0）
数据压缩解压缩（1）
高效编程（0）
linux内核学习（23）

IP分片（1）

netfilter（1）

IPsec学习（9）
SVN（0）
Makefile（3）
shell（8）
GNU自动化工具（5）
configure（5）
Linux调试（7）
C语言基础（2）
Linux 内核（17）
APUE（3）
未分配的博文（1）

文章存档

2016年（3）

2015年（103）

2014年（79）

我的朋友

linuxdev

相关博文

Linux Netlink通信机制详解（下）

分类： LINUX

2014-10-12 19:07:51

原文地址：Linux Netlink通信机制详解（下）作者：frankzfz

这里我以路由中的netlink为例，看一下内核中的处理流程是怎么样的！在/kernel/net/core/rtnetlink.c文件中，有一个接收从用户空间过来的Netlink消息的函数。

static void rtnetlink_rcv(struct sock *sk, int len)
{
unsigned int qlen = 0;
do {
rtnl_lock();
netlink_run_queue(sk, &qlen, &rtnetlink_rcv_msg);
up(&rtnl_sem);
netdev_run_todo();
} while (qlen);
}

上面的内核函数就是用来接收用户路由方面Netlink消息的，当我们使用route命令添加一条路由时，就会调用该函数接收。该函数是再netlink的初始化是注册的。同样在rtnetlink.c文件中。

void __init rtnetlink_init(void)
{
int i;
rtattr_max = 0;
for (i = 0; i < ARRAY_SIZE(rta_max); i++)
if (rta_max[i] > rtattr_max)
rtattr_max = rta_max[i];
rta_buf = kmalloc(rtattr_max * sizeof(struct rtattr *), GFP_KERNEL);
if (!rta_buf)
panic("rtnetlink_init: cannot allocate rta_buf\n");
rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
THIS_MODULE);//在创建内核的netlink时，注册了路由netlink的接收函数，rtnetlink_rcv.
if (rtnl == NULL)
panic("rtnetlink_init: cannot initialize rtnetlink\n");
netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
register_netdevice_notifier(&rtnetlink_dev_notifier);
rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table;
rtnetlink_links[PF_PACKET] = link_rtnetlink_table;
}

在netlink_kernel_create函数中，可以看到内核接收用户空间传过来的消息的接收函数，

struct sock *
netlink_kernel_create(int unit, unsigned int groups,
void (*input)(struct sock *sk, int len),
struct module *module)
{
struct socket *sock;
struct sock *sk;
struct netlink_sock *nlk;
if (!nl_table)
return NULL;
if (unit<0 || unit>=MAX_LINKS)
return NULL;
if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
return NULL;
if (__netlink_create(sock, unit) < 0)
goto out_sock_release;
sk = sock->sk;
sk->sk_data_ready = netlink_data_ready;
if (input)
nlk_sk(sk)->data_ready = input;//设置内核接收Netlink消息的函数，这里就是前面的rtnetlink_rcv函数
if (netlink_insert(sk, 0))
goto out_sock_release;
nlk = nlk_sk(sk); //取得sock嵌入的netlink_sock结构体
nlk->flags |= NETLINK_KERNEL_SOCKET;
netlink_table_grab();
nl_table[unit].groups = groups < 32 ? 32 : groups;
nl_table[unit].module = module;
nl_table[unit].registered = 1;// 更新netlink_table结构体信息，每中协议对应一个netlink_
table结构
netlink_table_ungrab();
return sk;
out_sock_release:
sock_release(sock);
return NULL;
}

到此，内核创建netlink到接收用户空间发送过来消息整个流程就清晰了。那当我们添加一条新路由时，在接收函数rtnetlink_rcv中的循环中，会从一个队列中调用实际的接收处理函数，这里为rtnetlink_rcv_msg函数。

/**
* nelink_run_queue - Process netlink receive queue.
* @sk: Netlink socket containing the queue
* @qlen: Place to store queue length upon entry
* @cb: Callback function invoked for each netlink message found
*
* Processes as much as there was in the queue upon entry and invokes
* a callback function for each netlink message found. The callback
* function may refuse a message by returning a negative error code
* but setting the error pointer to 0 in which case this function
* returns with a qlen != 0.
*
* qlen must be initialized to 0 before the initial entry, afterwards
* the function may be called repeatedly until qlen reaches 0.
*/
void netlink_run_queue(struct sock *sk, unsigned int *qlen,
int (*cb)(struct sk_buff *, struct nlmsghdr *, int *))
{
struct sk_buff *skb;
if (!*qlen || *qlen > skb_queue_len(&sk->sk_receive_queue))
*qlen = skb_queue_len(&sk->sk_receive_queue);
for (; *qlen; (*qlen)--) {
skb = skb_dequeue(&sk->sk_receive_queue);
if (netlink_rcv_skb(skb, cb)) {
if (skb->len)
skb_queue_head(&sk->sk_receive_queue, skb);
else {
kfree_skb(skb);
(*qlen)--;
}
break;
}
kfree_skb(skb);
}
}

下面是rtnetlink_rcv_msg()函数的实现，对netlink消息进行相应的处理。其中有一个数据结构

struct rtnetlink_link *link; 其定义如下：是两个不同的处理函数

struct rtnetlink_link
{
int (*doit)(struct sk_buff *, struct nlmsghdr*, void *attr);
int (*dumpit)(struct sk_buff *, struct netlink_callback *cb);
};
/* Process one rtnetlink message. */
static __inline__ int
rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
{
struct rtnetlink_link *link;
struct rtnetlink_link *link_tab;
int sz_idx, kind;
int min_len;
int family;
int type;
int err;
/* Only requests are handled by kernel now */
if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
return 0;
type = nlh->nlmsg_type;
/* A control message: ignore them */
if (type < RTM_BASE)
return 0;
/* Unknown message: reply with EINVAL */
if (type > RTM_MAX)
goto err_inval;
type -= RTM_BASE;
/* All the messages must have at least 1 byte length */
if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg)))
return 0;
family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
if (family >= NPROTO) {
*errp = -EAFNOSUPPORT;
return -1;
}
link_tab = rtnetlink_links[family];//根据用户空间传过来的不同德family类型，调用不同的处理函数，这里以路由为例的话为AF_ROUTE或者AF_NETLINK
if (link_tab == NULL)
link_tab = rtnetlink_links[PF_UNSPEC];
link = &link_tab[type]; //根据不同的type调用不同的处理函数。这里的type为RTM_NEWROUTE
sz_idx = type>>2;
kind = type&3;
if (kind != 2 && security_netlink_recv(skb)) {
*errp = -EPERM;
return -1;
}
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
if (link->dumpit == NULL)
link = &(rtnetlink_links[PF_UNSPEC][type]);
if (link->dumpit == NULL)
goto err_inval;
if ((*errp = netlink_dump_start(rtnl, skb, nlh,
link->dumpit, NULL)) != 0) {
return -1;
}
netlink_queue_skip(nlh, skb);
return -1;
}
memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
min_len = rtm_min[sz_idx];
if (nlh->nlmsg_len < min_len)
goto err_inval;
if (nlh->nlmsg_len > min_len) {
int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len);
while (RTA_OK(attr, attrlen)) {
unsigned flavor = attr->rta_type;
if (flavor) {
if (flavor > rta_max[sz_idx])
goto err_inval;
rta_buf[flavor-1] = attr;
}
attr = RTA_NEXT(attr, attrlen);
}
}
if (link->doit == NULL)
link = &(rtnetlink_links[PF_UNSPEC][type]);
if (link->doit == NULL)
goto err_inval;
err = link->doit(skb, nlh, (void *)&rta_buf[0]);//此处调用RTM_NEWROUTE，对应的route处理函数，也就是下面的inet6_rtm_newroute函数。
*errp = err;
return err;
err_inval:
*errp = -EINVAL;
return -1;
}
int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct rtmsg *r = NLMSG_DATA(nlh);
struct in6_rtmsg rtmsg;
if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
return -EINVAL;
return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
}

inet6_rtm_newroute函数通过下面的数组进行了相应的注册处理，所以上面的link->doit(skb, nlh, (void *)&rta_buf[0])就是根据下面的这个调用的。

static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = {
[RTM_GETLINK - RTM_BASE] = { .dumpit = inet6_dump_ifinfo, },
[RTM_NEWADDR - RTM_BASE] = { .doit = inet6_rtm_newaddr, },
[RTM_DELADDR - RTM_BASE] = { .doit = inet6_rtm_deladdr, },
[RTM_GETADDR - RTM_BASE] = { .dumpit = inet6_dump_ifaddr, },
[RTM_GETMULTICAST - RTM_BASE] = { .dumpit = inet6_dump_ifmcaddr, },
[RTM_GETANYCAST - RTM_BASE] = { .dumpit = inet6_dump_ifacaddr, },
[RTM_NEWROUTE - RTM_BASE] = { .doit = inet6_rtm_newroute, },
[RTM_DELROUTE - RTM_BASE] = { .doit = inet6_rtm_delroute, },
[RTM_GETROUTE - RTM_BASE] = { .doit = inet6_rtm_getroute,
.dumpit = inet6_dump_fib, },
};