这里我以路由中的netlink为例,看一下内核中的处理流程是怎么样的!在/kernel/net/core/rtnetlink.c文件中,有一个接收从用户空间过来的Netlink消息的函数。- static void rtnetlink_rcv(struct sock *sk, int len)
-
{
-
unsigned int qlen = 0;
-
-
do {
-
rtnl_lock();
-
netlink_run_queue(sk, &qlen, &rtnetlink_rcv_msg);
-
up(&rtnl_sem);
-
-
netdev_run_todo();
-
} while (qlen);
-
}
上面的内核函数就是用来接收用户路由方面Netlink消息的,当我们使用route命令添加一条路由时,就会调用该函数接收。该函数是再netlink的初始化是注册的。同样在rtnetlink.c文件中。
- void __init rtnetlink_init(void)
-
{
-
int i;
-
-
rtattr_max = 0;
-
for (i = 0; i < ARRAY_SIZE(rta_max); i++)
-
if (rta_max[i] > rtattr_max)
-
rtattr_max = rta_max[i];
-
rta_buf = kmalloc(rtattr_max * sizeof(struct rtattr *), GFP_KERNEL);
-
if (!rta_buf)
-
panic("rtnetlink_init: cannot allocate rta_buf\n");
-
-
rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
-
THIS_MODULE);//在创建内核的netlink时,注册了路由netlink的接收函数,rtnetlink_rcv.
-
if (rtnl == NULL)
-
panic("rtnetlink_init: cannot initialize rtnetlink\n");
-
netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
-
register_netdevice_notifier(&rtnetlink_dev_notifier);
-
rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table;
-
rtnetlink_links[PF_PACKET] = link_rtnetlink_table;
-
}
在netlink_kernel_create函数中,可以看到内核接收用户空间传过来的消息的接收函数,
- struct sock *
-
netlink_kernel_create(int unit, unsigned int groups,
-
void (*input)(struct sock *sk, int len),
-
struct module *module)
-
{
-
struct socket *sock;
-
struct sock *sk;
-
struct netlink_sock *nlk;
-
-
if (!nl_table)
-
return NULL;
-
-
if (unit<0 || unit>=MAX_LINKS)
-
return NULL;
-
-
if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
-
return NULL;
-
-
if (__netlink_create(sock, unit) < 0)
-
goto out_sock_release;
-
-
sk = sock->sk;
-
sk->sk_data_ready = netlink_data_ready;
-
if (input)
-
nlk_sk(sk)->data_ready = input;//设置内核接收Netlink消息的函数,这里就是前面的rtnetlink_rcv函数
-
-
if (netlink_insert(sk, 0))
-
goto out_sock_release;
-
-
nlk = nlk_sk(sk); //取得sock嵌入的netlink_sock结构体
-
nlk->flags |= NETLINK_KERNEL_SOCKET;
-
-
netlink_table_grab();
-
nl_table[unit].groups = groups < 32 ? 32 : groups;
-
nl_table[unit].module = module;
-
nl_table[unit].registered = 1;// 更新netlink_table结构体信息,每中协议对应一个netlink_
-
table结构
-
netlink_table_ungrab();
-
-
return sk;
-
-
out_sock_release:
-
sock_release(sock);
-
return NULL;
-
}
到此,内核创建netlink到接收用户空间发送过来消息整个流程就清晰了。那当我们添加一条新路由时,在接收函数rtnetlink_rcv中的循环中,会从一个队列中调用实际的接收处理函数,这里为rtnetlink_rcv_msg函数。
- /**
-
* nelink_run_queue - Process netlink receive queue.
-
* @sk: Netlink socket containing the queue
-
* @qlen: Place to store queue length upon entry
-
* @cb: Callback function invoked for each netlink message found
-
*
-
* Processes as much as there was in the queue upon entry and invokes
-
* a callback function for each netlink message found. The callback
-
* function may refuse a message by returning a negative error code
-
* but setting the error pointer to 0 in which case this function
-
* returns with a qlen != 0.
-
*
-
* qlen must be initialized to 0 before the initial entry, afterwards
-
* the function may be called repeatedly until qlen reaches 0.
-
*/
-
void netlink_run_queue(struct sock *sk, unsigned int *qlen,
-
int (*cb)(struct sk_buff *, struct nlmsghdr *, int *))
-
{
-
struct sk_buff *skb;
-
-
if (!*qlen || *qlen > skb_queue_len(&sk->sk_receive_queue))
-
*qlen = skb_queue_len(&sk->sk_receive_queue);
-
-
for (; *qlen; (*qlen)--) {
-
skb = skb_dequeue(&sk->sk_receive_queue);
-
if (netlink_rcv_skb(skb, cb)) {
-
if (skb->len)
-
skb_queue_head(&sk->sk_receive_queue, skb);
-
else {
-
kfree_skb(skb);
-
(*qlen)--;
-
}
-
break;
-
}
-
-
kfree_skb(skb);
-
}
-
}
下面是rtnetlink_rcv_msg()函数的实现,对netlink消息进行相应的处理。其中有一个数据结构
struct rtnetlink_link *link; 其定义如下:是两个不同的处理函数
- struct rtnetlink_link
-
{
-
int (*doit)(struct sk_buff *, struct nlmsghdr*, void *attr);
-
int (*dumpit)(struct sk_buff *, struct netlink_callback *cb);
-
};
-
/* Process one rtnetlink message. */
-
-
static __inline__ int
-
rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
-
{
-
struct rtnetlink_link *link;
-
struct rtnetlink_link *link_tab;
-
int sz_idx, kind;
-
int min_len;
-
int family;
-
int type;
-
int err;
-
-
/* Only requests are handled by kernel now */
-
if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
-
return 0;
-
type = nlh->nlmsg_type;
-
/* A control message: ignore them */
-
if (type < RTM_BASE)
-
return 0;
-
/* Unknown message: reply with EINVAL */
-
if (type > RTM_MAX)
-
goto err_inval;
-
type -= RTM_BASE;
-
/* All the messages must have at least 1 byte length */
-
if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg)))
-
return 0;
-
family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
-
if (family >= NPROTO) {
-
*errp = -EAFNOSUPPORT;
-
return -1;
-
}
-
-
link_tab = rtnetlink_links[family];//根据用户空间传过来的不同德family类型,调用不同的处理函数,这里以路由为例的话为AF_ROUTE或者AF_NETLINK
-
if (link_tab == NULL)
-
link_tab = rtnetlink_links[PF_UNSPEC];
-
link = &link_tab[type]; //根据不同的type调用不同的处理函数。这里的type为RTM_NEWROUTE
-
-
sz_idx = type>>2;
-
kind = type&3;
-
-
if (kind != 2 && security_netlink_recv(skb)) {
-
*errp = -EPERM;
-
return -1;
-
}
-
-
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
-
if (link->dumpit == NULL)
-
link = &(rtnetlink_links[PF_UNSPEC][type]);
-
-
if (link->dumpit == NULL)
-
goto err_inval;
-
-
if ((*errp = netlink_dump_start(rtnl, skb, nlh,
-
link->dumpit, NULL)) != 0) {
-
return -1;
-
}
-
-
netlink_queue_skip(nlh, skb);
-
return -1;
-
}
-
-
memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
-
-
min_len = rtm_min[sz_idx];
-
if (nlh->nlmsg_len < min_len)
-
goto err_inval;
-
-
if (nlh->nlmsg_len > min_len) {
-
int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
-
struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len);
-
-
while (RTA_OK(attr, attrlen)) {
-
unsigned flavor = attr->rta_type;
-
if (flavor) {
-
if (flavor > rta_max[sz_idx])
-
goto err_inval;
-
rta_buf[flavor-1] = attr;
-
}
-
attr = RTA_NEXT(attr, attrlen);
-
}
-
}
-
-
if (link->doit == NULL)
-
link = &(rtnetlink_links[PF_UNSPEC][type]);
-
if (link->doit == NULL)
-
goto err_inval;
-
err = link->doit(skb, nlh, (void *)&rta_buf[0]);//此处调用RTM_NEWROUTE,对应的route处理函数,也就是下面的inet6_rtm_newroute函数。
-
-
*errp = err;
-
return err;
-
-
err_inval:
-
*errp = -EINVAL;
-
return -1;
-
}
-
int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
-
{
-
struct rtmsg *r = NLMSG_DATA(nlh);
-
struct in6_rtmsg rtmsg;
-
-
if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
-
return -EINVAL;
-
return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
-
}
inet6_rtm_newroute函数通过下面的数组进行了相应的注册处理,所以上面的link->doit(skb, nlh, (void *)&rta_buf[0])就是根据下面的这个调用的。
- static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = {
-
[RTM_GETLINK - RTM_BASE] = { .dumpit = inet6_dump_ifinfo, },
-
[RTM_NEWADDR - RTM_BASE] = { .doit = inet6_rtm_newaddr, },
-
[RTM_DELADDR - RTM_BASE] = { .doit = inet6_rtm_deladdr, },
-
[RTM_GETADDR - RTM_BASE] = { .dumpit = inet6_dump_ifaddr, },
-
[RTM_GETMULTICAST - RTM_BASE] = { .dumpit = inet6_dump_ifmcaddr, },
-
[RTM_GETANYCAST - RTM_BASE] = { .dumpit = inet6_dump_ifacaddr, },
-
[RTM_NEWROUTE - RTM_BASE] = { .doit = inet6_rtm_newroute, },
-
[RTM_DELROUTE - RTM_BASE] = { .doit = inet6_rtm_delroute, },
-
[RTM_GETROUTE - RTM_BASE] = { .doit = inet6_rtm_getroute,
-
.dumpit = inet6_dump_fib, },
-
};
相关的结构体:
内核中所有的netlink套接字存储在一个全局的哈新表中,该结构定义如下
static struct netlink_table *nl_table;其中每个协议对应一个哈希表,所有的同一种协议的数
据报散列在同哈希表中
下面为一种协议所连接的哈希表结构:struct netlink_table {
struct nl_pid_hash
hash; // 根据pid进行HASH的netlink sock链表, 相当于客户端链表
struct hlist_head
mc_list; // 多播的sock链表
unsigned int nl_nonroot;
// 监听者标志
unsigned int groups;
// 每个netlink的协议类型可以定义多个组, 8的倍数,最小是32
struct module *module;
int registered;
};最大可有MAX_LINKS(32)个表,处理不同协议类型的netlink套接口, 注意由于是自身的通信, 本机
同时作为服务器和客户端, 服务端需要一个套接口对应,
每个客户端也要有一个套接口对应, 多个客户端的套接口形成一个链表.
- struct hlist_head *table; // 链表节点,每个桶中协议的sock连入其中,根据哈希值可得确定
-
的sock
-
unsigned long rehash_time; // 重新计算HASH的时间间隔
-
-
unsigned int mask;
-
unsigned int shift;
-
-
unsigned int entries; // 链表节点数
-
unsigned int max_shift; // 最大幂值
-
u32 rnd; // 随机数
-
};
在kernel/include/linux/Net.h中
- struct proto_ops {
-
int family;
-
struct module *owner;
-
int (*release) (struct socket *sock);
-
int (*bind) (struct socket *sock,
-
struct sockaddr *myaddr,
-
int sockaddr_len);
-
int (*connect) (struct socket *sock,
-
struct sockaddr *vaddr,
-
int sockaddr_len, int flags);
-
int (*socketpair)(struct socket *sock1,
-
struct socket *sock2);
-
int (*accept) (struct socket *sock,
-
struct socket *newsock, int flags);
-
int (*getname) (struct socket *sock,
-
struct sockaddr *addr,
-
int *sockaddr_len, int peer);
-
unsigned int (*poll) (struct file *file, struct socket *sock,
-
struct poll_table_struct *wait);
-
int (*ioctl) (struct socket *sock, unsigned int cmd,
-
unsigned long arg);
-
int (*listen) (struct socket *sock, int len);
-
int (*shutdown) (struct socket *sock, int flags);
-
int (*setsockopt)(struct socket *sock, int level,
-
int optname, char __user *optval, int optlen);
-
int (*getsockopt)(struct socket *sock, int level,
-
int optname, char __user *optval, int __user *optlen);
-
int (*sendmsg) (struct kiocb *iocb, struct socket *sock,//netlink套接字实际的发送与接收函数
-
struct msghdr *m, size_t total_len);
-
int (*recvmsg) (struct kiocb *iocb, struct socket *sock,
-
struct msghdr *m, size_t total_len,
-
int flags);
-
int (*mmap) (struct file *file, struct socket *sock,
-
struct vm_area_struct * vma);
-
ssize_t (*sendpage) (struct socket *sock, struct page *page,
-
int offset, size_t size, int flags);
-
};
下面我们看看,当我们使用route命令添加一个新的路由是,这个函数的调用顺序是怎么样的。下面是主要的函数;
Dput()
sys_sendmsg()//内核的接受函数
new_inode()
netlink_sendmsg//内核态接收用户态发送的数据
rtnetlink_rcv()
netlink_run_queue()
rtnetlink_rcv_msg()
inet6_rtm_newroute()
在kernel/net/netlink/af_netlink.c文件中,内核态接收用户态发送的数据,在netlink_sendskb函数中调用sock的队列,执行相应的netlink接收函数
- static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
-
struct msghdr *msg, size_t len)
-
{
-
struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
-
struct sock *sk = sock->sk;
-
struct netlink_sock *nlk = nlk_sk(sk);
-
struct sockaddr_nl *addr=msg->msg_name;
-
u32 dst_pid;
-
u32 dst_group;
-
struct sk_buff *skb;
-
int err;
-
struct scm_cookie scm;
-
-
if (msg->msg_flags&MSG_OOB)
-
return -EOPNOTSUPP;
-
-
if (NULL == siocb->scm)
-
siocb->scm = &scm;
-
err = scm_send(sock, msg, siocb->scm);
-
if (err < 0)
-
return err;
-
-
if (msg->msg_namelen) {
-
if (addr->nl_family != AF_NETLINK)
-
return -EINVAL;
-
dst_pid = addr->nl_pid;
-
dst_group = ffs(addr->nl_groups);
-
if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND))
-
return -EPERM;
-
} else {
-
dst_pid = nlk->dst_pid;
-
dst_group = nlk->dst_group;
-
}
-
-
if (!nlk->pid) {
-
err = netlink_autobind(sock);
-
if (err)
-
goto out;
-
}
-
-
err = -EMSGSIZE;
-
if (len > sk->sk_sndbuf - 32)
-
goto out;
-
err = -ENOBUFS;
-
skb = alloc_skb(len, GFP_KERNEL);// 分配一个sk_buff结构,将msghdr结构转化为sk_buff结构
-
if (skb==NULL)
-
goto out;
-
-
NETLINK_CB(skb).pid = nlk->pid;//填写本地的pid信息
-
NETLINK_CB(skb).dst_pid = dst_pid;
-
NETLINK_CB(skb).dst_group = dst_group;
-
NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context);
-
memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
-
-
/* What can I do? Netlink is asynchronous, so that
-
we will have to save current capabilities to
-
check them, when this message will be delivered
-
to corresponding kernel module. --ANK (980802)
-
*/
-
-
err = -EFAULT;
-
//数据拷贝进sk_buff中
-
if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len)) {
-
kfree_skb(skb);
-
goto out;
-
}
-
-
err = security_netlink_send(sk, skb);
-
if (err) {
-
kfree_skb(skb);
-
goto out;
-
}
-
-
if (dst_group) {
-
atomic_inc(&skb->users);
-
netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL);
-
}
-
err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
-
-
out:
-
return err;
-
}
阅读(1504) | 评论(0) | 转发(0) |