1 pppoe kernel 分析 【2.6.16】
static int __init pppoe_init(void)
{
int err = proto_register(&pppoe_sk_proto, 0);
//注册socket协议,协议名称PPPOE
if (err)
goto out;
err = register_pppox_proto(PX_PROTO_OE, &pppoe_proto);
/*注册pppoe协议类型,这个是在pppox内部注册
这个注册后将为socket提高一个重要的调用 pppoe_create(struct socket *sock)
在这个函数中
sk->sk_backlog_rcv = pppoe_rcv_core;
//这个函数提供了接收报文处理函数,
当sk_state置PPPOX_BOUND标志位的话,这个报文将首先在内核处理ppp_input(),
如果没有改标志位的话交到sock接收队列sock_queue_rcv_skb(sk, skb)。将在用户空间进行处理
sk->sk_state = PPPOX_NONE;
sk->sk_type = SOCK_STREAM;
sk->sk_family = PF_PPPOX;
sk->sk_protocol = PX_PROTO_OE;
*/
if (err)
goto out_unregister_pppoe_proto;
err = pppoe_proc_init();
if (err)
goto out_unregister_pppox_proto;
dev_add_pack(&pppoes_ptype);
dev_add_pack(&pppoed_ptype);
register_netdevice_notifier(&pppoe_notifier);
out:
return err;
out_unregister_pppox_proto:
unregister_pppox_proto(PX_PROTO_OE);
out_unregister_pppoe_proto:
proto_unregister(&pppoe_sk_proto);
goto out;
}
2 ppp Generic
ppp_input() ,这个函数将判断ppp报文是链路控制报文还是数据报文来决定处理方式
如果为 Link Control Protocol 那么就会将包围挂到pch->file.rq ,pppd来负责接收控制报文的队列
如果为其他类型的,将通过ppp_do_recv 来对报文(如果是压缩报文:解压),去ppp头。 最后调用netif_rx(skb);
来实现ppp网口收报文的处理。
3 netif_rx 的作用?
{
struct softnet_data *queue;
unsigned long flags;
/* if netpoll wants it, pretend we never saw it */
if (netpoll_rx(skb))
return NET_RX_DROP;
//首先如果存在netpoll启用netpoll机制来接收处理报文
if (!skb->tstamp.off_sec)
net_timestamp(skb);
// 如果没有注册netpoll处理函数,则放到cpu的接收队列
/*
* The code is rearranged so that the path is the most
* short when CPU is congested, but is still operating.
*/
local_irq_save(flags);
queue = &__get_cpu_var(softnet_data);
__get_cpu_var(netdev_rx_stat).total++;
if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
if (queue->input_pkt_queue.qlen) {
enqueue:
dev_hold(skb->dev);
__skb_queue_tail(&queue->input_pkt_queue, skb);
local_irq_restore(flags);
return NET_RX_SUCCESS;
}
//数据报文放到cpu接收队列,由cpu默认net_dev提供的poll函数即:process_backlog处理报文
//下面引发软中断
netif_rx_schedule(&queue->backlog_dev);
goto enqueue;
}
__get_cpu_var(netdev_rx_stat).dropped++;
local_irq_restore(flags);
kfree_skb(skb);
return NET_RX_DROP;
}
4 疑问pppoe为什么还要占用pty/tty设备文件呢?
5 netif_receive_skb() 函数分析
负责把数据报文按packet_type丢到上层协议处理, 所有数据往上送到协议栈的必经
之路
ptype_all 优先处理,这个是RAW socket走过的路径之地,socket没打开一个会加到这个链表中来。
处理过程数据包被复制一份处理。
可以看到桥模式处理在真正的数据报文处理之前
int netif_receive_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
int ret = NET_RX_DROP;
unsigned short type;
/* if we've gotten here through NAPI, check netpoll */
if (skb->dev->poll && netpoll_rx(skb))
return NET_RX_DROP;
if (!skb->tstamp.off_sec)
net_timestamp(skb);
if (!skb->input_dev)
skb->input_dev = skb->dev;
orig_dev = skb_bond(skb);
__get_cpu_var(netdev_rx_stat).total++;
skb->h.raw = skb->nh.raw = skb->data;
skb->mac_len = skb->nh.raw - skb->mac.raw;
pt_prev = NULL;
rcu_read_lock();
#ifdef CONFIG_NET_CLS_ACT
if (skb->tc_verd & TC_NCLS) {
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
goto ncls;
}
#endif
// RAW socket 优先处理, pppoe discovery 要走这里
list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}
#ifdef CONFIG_NET_CLS_ACT
if (pt_prev) {
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = NULL; /* noone else should process this after*/
} else {
skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
}
ret = ing_filter(skb);
if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
kfree_skb(skb);
goto out;
}
skb->tc_verd = 0;
ncls:
#endif
handle_diverter(skb);
//先处理桥模式情况
if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
goto out;
type = skb->protocol;
//再如果桥模式没有转发掉,进入路由模式处理
list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15],
list) {
if (ptype->type == type &&
(!ptype->dev || ptype->dev == skb->dev)) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}
if (pt_prev) {
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
} else {
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
ret = NET_RX_DROP;
}
out:
rcu_read_unlock();
return ret;
}
6 net_dev_init 这个函数告诉我们很多东西
static int __init net_dev_init(void)
{
int i, rc = -ENOMEM;
BUG_ON(!dev_boot_phase);
net_random_init();
if (dev_proc_init())
goto out;
if (netdev_sysfs_init())
goto out;
INIT_LIST_HEAD(&ptype_all);
for (i = 0; i < 16; i++)
INIT_LIST_HEAD(&ptype_base[i]);
for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
INIT_HLIST_HEAD(&dev_name_head[i]);
for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
INIT_HLIST_HEAD(&dev_index_head[i]);
/*
* Initialise the packet receive queues.
*/
for_each_cpu(i) {
struct softnet_data *queue;
queue = &per_cpu(softnet_data, i);
skb_queue_head_init(&queue->input_pkt_queue);
queue->completion_queue = NULL;
INIT_LIST_HEAD(&queue->poll_list);
set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
queue->backlog_dev.weight = weight_p;
//cpu默认处理接收队列的函数在此
queue->backlog_dev.poll = process_backlog;
atomic_set(&queue->backlog_dev.refcnt, 1);
}
dev_boot_phase = 0;
// 收发报文软中断在此
open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
open_softirq(NET_RX_SOFTIRQ, net_rx_action,
NULL);
hotcpu_notifier(dev_cpu_callback, 0);
dst_init();
dev_mcast_init();
rc = 0;
out:
return rc;
}
接着看看下面的几个调用
static int process_backlog(struct
net_device *backlog_dev, int *budget)
{
int work = 0;
int quota = min(backlog_dev->quota, *budget);
//取到cpu接收队列
struct softnet_data *queue = &__get_cpu_var(softnet_data);
unsigned long start_time = jiffies;
backlog_dev->weight = weight_p;
for (;;) {
struct sk_buff *skb;
struct net_device *dev;
local_irq_disable();
skb = __skb_dequeue(&queue->input_pkt_queue);
if (!skb)
goto job_done;
local_irq_enable();
dev = skb->dev;
//上送到协议栈进行数据报文,从此结束底层的绕来绕去
netif_receive_skb(skb);
dev_put(dev);
work++;
if (work >= quota || jiffies - start_time > 1)
break;
}
backlog_dev->quota -= work;
*budget -= work;
return -1;
job_done:
backlog_dev->quota -= work;
*budget -= work;
list_del(&backlog_dev->poll_list);
smp_mb__before_clear_bit();
netif_poll_enable(backlog_dev);
local_irq_enable();
return 0;
}
//
static void net_rx_action(struct
softirq_action *h)
{
struct softnet_data *queue = &__get_cpu_var(softnet_data);
unsigned long start_time = jiffies;
int budget = netdev_budget;
void *have;
local_irq_disable();
while (!list_empty(&queue->poll_list)) {
struct net_device *dev;
if (budget <= 0 || jiffies - start_time > 1)
goto softnet_break;
local_irq_enable();
dev = list_entry(queue->poll_list.next,
struct net_device, poll_list);
have = netpoll_poll_lock(dev);
//调用设备的poll函数来掉netif_receive_skb()提交到上层协议
if (dev->quota <= 0 || dev->poll(dev, &budget)) {
netpoll_poll_unlock(have);
local_irq_disable();
//如果某个设备级别太低,放在对尾处理
list_del(&dev->poll_list);
list_add_tail(&dev->poll_list, &queue->poll_list);
if (dev->quota < 0)
dev->quota += dev->weight;
else
dev->quota = dev->weight;
} else {
netpoll_poll_unlock(have);
dev_put(dev);
local_irq_disable();
}
}
out:
local_irq_enable();
return;
softnet_break:
// 时间太久,超过1个时钟滴答退出
__get_cpu_var(netdev_rx_stat).time_squeeze++;
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
goto out;
}
阅读(2338) | 评论(0) | 转发(0) |