内核中接收网络帧的处理-1158556105-ChinaUnix博客

linux_kernel

首页　| 　博文目录　| 　关于我

1158556105

博客访问： 80376
博文数量： 35
博客积分： 0
博客等级：民兵
技术积分： 140
用户组：普通用户
注册时间： 2015-03-11 10:56

文章分类

全部博文（35）

study metho（1）
On road（1）
uc/os ii（1）
IAR（2）
网络常识（1）
linux驱动源码分（9）
前辈经验（1）
mips内存管理（3）
NIC（12）
未分配的博文（4）

文章存档

2016年（2）

2015年（33）

我的朋友

内核中接收网络帧的处理

我这里描述的只是2层的处理。

首先，我们来看softnet_data这个结构，每个cpu都有这样的一个队列，它主要是用来存储incoming frame。由于他是每个cpu都有一个队列，因此在不同的cpu之间我们就不要任何锁来控制并发的处理这个帧队列。我们在操作系统层要取得帧数据，都是通过这个数据来读取。

				Java代码  
			
				/* 
			
				 * Incoming packets are placed on per-cpu queues so that 
			
				 * no locking is needed. 
			
				 */  
			
				struct softnet_data  
			
				{  
			
				///qdisc是queueing discipline的简写，也就是排队规则，就是我们经常说的qos.这里也就是输出帧的控制。  
			
				    struct Qdisc        *output_queue;  
			
				///当输入帧被驱动取得之前，就保存在这个队列里，这里要注意，这个只是非napi的驱动才会这样，而napi的驱动则是有自己的私有的队列。  
			
				    struct sk_buff_head input_pkt_queue;  
			
				///表示有输入帧待处理的设备链表。  
			
				    struct list_head    poll_list;  
			
				///表示已经成功被传递出的帧的链表。  
			
				    struct sk_buff      *completion_queue;  
			
				///用来兼容非napi的驱动。  
			
				    struct napi_struct  backlog;  
			
				#ifdef CONFIG_NET_DMA  
			
				    struct dma_chan     *net_dma;  
			
				#endif  
			
				};

这张图很好的表示了coy的softnet_data结构和网络设备的关系：

接下来我们来看它的初始化：

				Java代码  
			
				static int __init net_dev_init(void)  
			
				{  
			
				..............................  
			
				    for_each_possible_cpu(i) {  
			
				        struct softnet_data *queue;  
			
				///取每一个cpu的队列，并初始化。  
			
				        queue = &per_cpu(softnet_data, i);  
			
				        skb_queue_head_init(&queue->input_pkt_queue);  
			
				        queue->completion_queue = NULL;  
			
				        INIT_LIST_HEAD(&queue->poll_list);  
			
				///这里我们就很清楚的看到backlog的功能了，由于napi的驱动都会有一个poll的虚函数，而非napi是没有的，因此这里会给所有的非napi的驱动赋值一个默认的处理方法。  
			
				        queue->backlog.poll = process_backlog;  
			
				        queue->backlog.weight = weight_p;  
			
				    }  
			
				...............................................  
			
				    return rc;  
			
				}

当一个新的帧的到达之后，内核处理的函数有两种(其实也就是2层的处理，当处理完后，就将帧扔到3层)：

1 老的netif_rx函数
2 新的napi接口。

首先来介绍napi。

简单来说就是，当内核还在处理一个帧的时候，有另外的帧到来，这时napi不需要再执行中断，而是保持轮询设备的输入队列，从而取得新到的帧，当队列为空时，退出轮询。重新打开中断。

napi的数据结构：

				Java代码  
			
				struct napi_struct {  
			
				    /* The poll_list must only be managed by the entity which 
			
				     * changes the state of the NAPI_STATE_SCHED bit.  This means 
			
				     * whoever atomically sets that bit can add this napi_struct 
			
				     * to the per-cpu poll_list, and whoever clears that bit 
			
				     * can remove from the list right before clearing the bit. 
			
				     */  
			
				///有新的帧等到被执行的设备链表，这个链表的头就是softnet_data->poll_list.  
			
				    struct list_head    poll_list;  
			
				///napi的状态  
			
				    unsigned long       state;  
			
				///也就是表示分配给此napi的所能处理的帧的限额。  
			
				    int         weight;  
			
				///从设备的输入队列中取得数据的虚函数。  
			
				    int         (*poll)(struct napi_struct *, int);  
			
				#ifdef CONFIG_NETPOLL  
			
				    spinlock_t      poll_lock;  
			
				    int         poll_owner;  
			
				    struct net_device   *dev;  
			
				    struct list_head    dev_list;  
			
				#endif  
			
				};

下面的这张图可以看出napi和net_rx_action(软中断处理函数)的关系：

接下来我们来看一下内核如何把老的驱动模型和napi统一到一起，关键的数据结构就是我们上面讲的softnet_data的backlog结构。

先来看下napi和非napi驱动的区别：

点击查看原始大小图片

这里很清楚的看到在飞napi的驱动中，要调用netif_rx函数，然后再调用netif_rx_schedule来把所需处理的帧交给软中断处理链表。

我们来看它的实现:

				Java代码  
			
				int netif_rx(struct sk_buff *skb)  
			
				{  
			
				    struct softnet_data *queue;  
			
				    unsigned long flags;  
			
				    /* if netpoll wants it, pretend we never saw it */  
			
				    if (netpoll_rx(skb))  
			
				        return NET_RX_DROP;  
			
				///得到帧被接收的时间  
			
				    if (!skb->tstamp.tv64)  
			
				        net_timestamp(skb);  
			
				    /* 
			
				     * The code is rearranged so that the path is the most 
			
				     * short when CPU is congested, but is still operating. 
			
				     */  
			
				///保存当前的状态设备状态。  
			
				    local_irq_save(flags);  
			
				///取得当前cpu的softnet_data数据  
			
				    queue = &__get_cpu_var(softnet_data);  
			
				///将被当前cpu所接受的总帧的数目加一。  
			
				    __get_cpu_var(netdev_rx_stat).total++;  
			
				///监测设备是否还有空间来存储帧，如果空间已满，表示网络阻塞严重，则返回一个错误，此后cpu将丢掉再来的帧。  
			
				    if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {  
			
				        if (queue->input_pkt_queue.qlen) {  
			
				enqueue:  
			
				///这个帧被加入到softnet_data的输入队列。并返回成功。  
			
				            __skb_queue_tail(&queue->input_pkt_queue, skb);  
			
				            local_irq_restore(flags);  
			
				            return NET_RX_SUCCESS;  
			
				        }  
			
				///当队列是空的时候，表明这个队列并没有被软中断所schedule，因此我们需要将此队列加入到软中断的处理链表中。可以看到加入的正好是backlog，由于调用netif_rx的是非napi的驱动，因此backlog就是初始化时的process_backlog函数。  
			
				        napi_schedule(&queue->backlog);  
			
				        goto enqueue;  
			
				    }  
			
				    __get_cpu_var(netdev_rx_stat).dropped++;  
			
				    local_irq_restore(flags);  
			
				    kfree_skb(skb);  
			
				    return NET_RX_DROP;  
			
				}

然后我们来看网络代码最核心的一个函数net_rx_action 也就是软中断(NET_RX_SOFTIRQ)的处理函数：

				Java代码  
			
				static void net_rx_action(struct softirq_action *h)  
			
				{  
			
				///得到设备链表  
			
				    struct list_head *list = &__get_cpu_var(softnet_data).poll_list;  
			
				///执行开始时间  
			
				    unsigned long start_time = jiffies;  
			
				///当前所要处理的帧的最大数目。这个数目一定要限制，因为我们不能在此软中断耗费太多的时间。  
			
				    int budget = netdev_budget;  
			
				    void *have;  
			
				///关闭中断  
			
				    local_irq_disable();  
			
				///开始循环处理设备链表  
			
				    while (!list_empty(list)) {  
			
				        struct napi_struct *n;  
			
				        int work, weight;  
			
				///当处理完所需处理的帧，或者时间超时，则直接退出。  
			
				        if (unlikely(budget <= 0 || jiffies != start_time))  
			
				            goto softnet_break;  
			
				        local_irq_enable();  
			
				///取出设备的napi数据结构。  
			
				        n = list_entry(list->next, struct napi_struct, poll_list);  
			
				///加锁( 自旋锁)  
			
				        have = netpoll_poll_lock(n);  
			
				        weight = n->weight;  
			
				        work = 0;  
			
				///测试napi的状态，只有为NAPI_STATE_SCHED状态时，我们才能调用napi的poll方法。它会返回所处理的帧的数目。  
			
				        if (test_bit(NAPI_STATE_SCHED, &n->state))  
			
				            work = n->poll(n, weight);  
			
				        WARN_ON_ONCE(work > weight);  
			
				///得到还需处理的帧的数目  
			
				        budget -= work;  
			
				        local_irq_disable();  
			
				///当处理的帧等于分配给此设备的处理帧的限额，则进行相关处理  
			
				        if (unlikely(work == weight)) {  
			
				            if (unlikely(napi_disable_pending(n)))  
			
				                __napi_complete(n);  
			
				            else  
			
				///移除设备队列。  
			
				                list_move_tail(&n->poll_list, list);  
			
				        }  
			
				        netpoll_poll_unlock(have);  
			
				    }  
			
				out:  
			
				    local_irq_enable();  
			
				......................................  
			
				    return;  
			
				softnet_break:  
			
				    __get_cpu_var(netdev_rx_stat).time_squeeze++;  
			
				    __raise_softirq_irqoff(NET_RX_SOFTIRQ);  
			
				    goto out;  
			
				}

下来我们来看process_backlog函数，这个函数也就是非napi的驱动的默认poll的实现，napi的驱动的poll的实现，与它大体类似。

				Java代码  
			
				static int process_backlog(struct napi_struct *napi, int quota)  
			
				{  
			
				///得到一些初始化值。  
			
				    int work = 0;  
			
				    struct softnet_data *queue = &__get_cpu_var(softnet_data);  
			
				    unsigned long start_time = jiffies;  
			
				    napi->weight = weight_p;  
			
				///进入循环处理。  
			
				    do {  
			
				        struct sk_buff *skb;  
			
				        local_irq_disable();  
			
				///得到输入队列。  
			
				        skb = __skb_dequeue(&queue->input_pkt_queue);  
			
				        if (!skb) {  
			
				///如果输入队列为空，则设置此napi的标志，并退出。  
			
				            __napi_complete(napi);  
			
				            local_irq_enable();  
			
				            break;  
			
				        }  
			
				        local_irq_enable();  
			
				///处理输入帧，也就是进行一些2层的处理从而发给三层。  
			
				        netif_receive_skb(skb);  
			
				    } while (++work < quota && jiffies == start_time);///设备处理帧的配额已经完成，或者时间太长，则退出。  
			
				    return work;  
			
				}

阅读(949) | 评论(0) | 转发(0) |

上一篇：linux的传统方法和NAPI方法收包

下一篇：网卡驱动11-网卡中的广播地址 0x8000解释

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6