Chinaunix首页 | 论坛 | 博客
  • 博客访问: 265849
  • 博文数量: 52
  • 博客积分: 406
  • 博客等级: 一等列兵
  • 技术积分: 549
  • 用 户 组: 普通用户
  • 注册时间: 2012-04-21 12:34
个人简介

......

文章分类

全部博文(52)

文章存档

2014年(1)

2013年(32)

2012年(19)

我的朋友

分类: LINUX

2013-11-26 21:39:20

II)接收数据包部分

1. 采用non_napi方式
在中断函数ixgb_intr()中,调用了ixgb_clean_rx_irq()函数来进行中断处理。在这里non_napi方式和napi方式采用了同名的函数,napi多了2个参数如下:
static boolean_t
#ifdef CONFIG_IXGB_NAPI
ixgb_clean_rx_irq(struct ixgb_adapter *adapter, int *work_done, int work_to_do)
#else
ixgb_clean_rx_irq(struct ixgb_adapter *adapter)
#endif



点击(此处)折叠或打开

  1. static boolean_t
  2. #ifdef CONFIG_IXGB_NAPI
  3. ixgb_clean_rx_irq(struct ixgb_adapter *adapter, int *work_done, int work_to_do)
  4. #else
  5. ixgb_clean_rx_irq(struct ixgb_adapter *adapter)
  6. #endif
  7. {
  8.     struct ixgb_desc_ring *rx_ring = &adapter->rx_ring;
  9.     struct net_device *netdev = adapter->netdev;
  10.     struct pci_dev *pdev = adapter->pdev;
  11.     struct ixgb_rx_desc *rx_desc, *next_rxd;
  12.     struct ixgb_buffer *buffer_info, *next_buffer, *next2_buffer;
  13.     uint32_t length;
  14.     unsigned int i, j;
  15.     boolean_t cleaned = FALSE;

  16.     i = rx_ring->next_to_clean;
  17.     rx_desc = IXGB_RX_DESC(*rx_ring, i);
  18.     buffer_info = &rx_ring->buffer_info[i];

  19.     while(rx_desc->status & IXGB_RX_DESC_STATUS_DD) {    //如前面分析rx_desc是描述符,进行了一致性DMA映射,所以直接读取其值判断这个中断是否是数据包到来产生的中断
  20.         struct sk_buff *skb, *next_skb;                  //因为对skb进行了DMA映射,一次中断最多可以接收rxd->count个数据包,所以这里对rxd->count个描述符轮询
  21.         u8 status;

  22. #ifdef CONFIG_IXGB_NAPI
  23.         if(*work_done >= work_to_do)
  24.             break;

  25.         (*work_done)++;
  26. #endif
  27.         status = rx_desc->status;
  28.         skb = buffer_info->skb;
  29.         buffer_info->skb = NULL;

  30.         prefetch(skb->data);

  31.         if(++i == rx_ring->count) i = 0;
  32.         next_rxd = IXGB_RX_DESC(*rx_ring, i);   //得到下一个描述符
  33.         prefetch(next_rxd);

  34.         if((j = i + 1) == rx_ring->count) j = 0;
  35.         next2_buffer = &rx_ring->buffer_info[j];  //预先取得第二个buffer
  36.         prefetch(next2_buffer);

  37.         next_buffer = &rx_ring->buffer_info[i];   //得到下一个buffer
  38.         next_skb = next_buffer->skb;
  39.         prefetch(next_skb);

  40.         cleaned = TRUE;

  41.         pci_unmap_single(pdev,
  42.                  buffer_info->dma,
  43.                  buffer_info->length,
  44.                  PCI_DMA_FROMDEVICE);     //把控制权还给CPU

  45.         length = le16_to_cpu(rx_desc->length);

  46.         if(unlikely(!(status & IXGB_RX_DESC_STATUS_EOP))) {

  47.             /* All receives must fit into a single buffer */

  48.             IXGB_DBG("Receive packet consumed multiple buffers "
  49.                      "length<%x>\n", length);

  50.             dev_kfree_skb_irq(skb);
  51.             goto rxdesc_done;
  52.         }

  53.         if (unlikely(rx_desc->errors
  54.              & (IXGB_RX_DESC_ERRORS_CE | IXGB_RX_DESC_ERRORS_SE
  55.                 | IXGB_RX_DESC_ERRORS_P |
  56.                 IXGB_RX_DESC_ERRORS_RXE))) {

  57.             dev_kfree_skb_irq(skb);
  58.             goto rxdesc_done;
  59.         }

  60.         /* code added for copybreak, this should improve
  61.          * performance for small packets with large amounts
  62.          * of reassembly being done in the stack */
  63. #define IXGB_CB_LENGTH 256
  64.         if (length < IXGB_CB_LENGTH) {
  65.             struct sk_buff *new_skb =
  66.              netdev_alloc_skb(netdev, length + NET_IP_ALIGN);
  67.             if (new_skb) {
  68.                 skb_reserve(new_skb, NET_IP_ALIGN);
  69.                 memcpy(new_skb->data - NET_IP_ALIGN,
  70.                  skb->data - NET_IP_ALIGN,
  71.                  length + NET_IP_ALIGN);
  72.                 /* save the skb in buffer_info as good */
  73.                 buffer_info->skb = skb;
  74.                 skb = new_skb;
  75.             }
  76.         }
  77.         /* end copybreak code */

  78.         /* Good Receive */
  79.         skb_put(skb, length);

  80.         /* Receive Checksum Offload */
  81.         ixgb_rx_checksum(adapter, rx_desc, skb);

  82.         skb->protocol = eth_type_trans(skb, netdev);
  83. #ifdef CONFIG_IXGB_NAPI
  84.         if(adapter->vlgrp && (status & IXGB_RX_DESC_STATUS_VP)) {
  85.             vlan_hwaccel_receive_skb(skb, adapter->vlgrp,
  86.                 le16_to_cpu(rx_desc->special) &
  87.                     IXGB_RX_DESC_SPECIAL_VLAN_MASK);
  88.         } else {
  89.             netif_receive_skb(skb);
  90.         }
  91. #else /* CONFIG_IXGB_NAPI */
  92.         if(adapter->vlgrp && (status & IXGB_RX_DESC_STATUS_VP)) {
  93.             vlan_hwaccel_rx(skb, adapter->vlgrp,
  94.                 le16_to_cpu(rx_desc->special) &
  95.                     IXGB_RX_DESC_SPECIAL_VLAN_MASK);
  96.         } else {
  97.             netif_rx(skb);    //non_napi 调用的接收函数
  98.         }
  99. #endif /* CONFIG_IXGB_NAPI */
  100.         netdev->last_rx = jiffies;

  101. rxdesc_done:
  102.         /* clean up descriptor, might be written over by hw */
  103.         rx_desc->status = 0;

  104.         /* use prefetched values */
  105.         rx_desc = next_rxd;            //指向下一个描述符
  106.         buffer_info = next_buffer;     //指向下一个buffer
  107.     }

  108.     rx_ring->next_to_clean = i;    //移动位置

  109.     ixgb_alloc_rx_buffers(adapter);     //再分配前面吃掉的skb,用多少再分配多少

  110.     return cleaned;
  111. }
 还是先把代码贴上吧。 基本上看注释就行了, 接下来看netif_rx(skb); 还是先贴代码,给代码加注释

点击(此处)折叠或打开

  1. int netif_rx(struct sk_buff *skb)
  2. {
  3.     struct softnet_data *queue;
  4.     unsigned long flags;

  5.     /* if netpoll wants it, pretend we never saw it */
  6.     if (netpoll_rx(skb))   //稍微看了一下这个函数只处理ARP和UDP数据包,具体为什么以后再分析
  7.         return NET_RX_DROP;

  8.     if (!skb->tstamp.off_sec)
  9.         net_timestamp(skb);

  10.     /*
  11.      * The code is rearranged so that the path is the most
  12.      * short when CPU is congested, but is still operating.
  13.      */
  14.     local_irq_save(flags);
  15.     queue = &__get_cpu_var(softnet_data);  //得到当前CPU的软中断调度队列,每个CPU有自己的软中断调度队列

  16.     __get_cpu_var(netdev_rx_stat).total++;
  17.     if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {   //这个是软中断调度队列的预算,这个值默认为300,可以在/proc文件中改变
  18.         if (queue->input_pkt_queue.qlen) {
  19. enqueue:
  20.             dev_hold(skb->dev);
  21.             __skb_queue_tail(&queue->input_pkt_queue, skb);    //在这里把skb挂到input_pkt_queue接收队列中,接着就退出了,因为在中断函数不能做太多的事情。
  22.                                                                //接收软中断处理从input_pkt_queue取走skb交给上层协议栈,
  23.                                                          
  24.             local_irq_restore(flags);
  25.             return NET_RX_SUCCESS;
  26.         }

  27.         netif_rx_schedule(&queue->backlog_dev);  //这个函数把backlog_dev设备挂接到软中断队列中,分配这个网卡设备的配额,并唤醒接收软件中断,注意当
  28.                                                  //queue->input_pkt_queue.qlen == 0, 即队列为空时才调用这个函数
  29.         goto enqueue;
  30.     }

  31.     __get_cpu_var(netdev_rx_stat).dropped++;
  32.     local_irq_restore(flags);

  33.     kfree_skb(skb);   //如果上面的if条件不满足,即网卡接收数据包快,来不及处理,则直接丢包。
  34.     return NET_RX_DROP;
  35. }

在这个函数中接收中断处理函数就算结束了,总结一下接收中断处理函数所干的工作就是来一个接收中断,就把DMA内存中所有的数据包挂在接收队列中,如果接收队列为空就唤醒接收软件中断去接收,如果包过快,来不及处理就直接在这里丢包。下面就看接收软件中断函数了:


点击(此处)折叠或打开

  1. /*
     *    Initialize the DEV module. At boot time this walks the device list and
     *    unhooks any devices that fail to initialise (normally hardware not
     *    present) and leaves us with a valid list of present and active devices.
     *
     */
  2. static int __init net_dev_init(void
  3. {
  4.     int i, rc = -ENOMEM;

  5.     BUG_ON(!dev_boot_phase);

  6.     if (dev_proc_init())
  7.         goto out;

  8.     if (netdev_sysfs_init())
  9.         goto out;

  10.     INIT_LIST_HEAD(&ptype_all);
  11.     for (i = 0; i < 16; i++)
  12.         INIT_LIST_HEAD(&ptype_base[i]);

  13.     for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
  14.         INIT_HLIST_HEAD(&dev_name_head[i]);

  15.     for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
  16.         INIT_HLIST_HEAD(&dev_index_head[i]);

  17.     /*
  18.      *    Initialise the packet receive queues.
  19.      */

  20.     for_each_possible_cpu(i) {
  21.         struct softnet_data *queue;

  22.         queue = &per_cpu(softnet_data, i);
  23.         skb_queue_head_init(&queue->input_pkt_queue); //non_napi
  24.         queue->completion_queue = NULL;
  25.         INIT_LIST_HEAD(&queue->poll_list); //napi
  26.         set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
  27.         queue->backlog_dev.weight = weight_p; //non_napi 默认配额为64
  28.         queue->backlog_dev.poll = process_backlog; //non_napi
  29.         atomic_set(&queue->backlog_dev.refcnt, 1);
  30.     }

  31.     netdev_dma_register();

  32.     dev_boot_phase = 0;

  33.     open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
  34.     open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);

  35.     hotcpu_notifier(dev_cpu_callback, 0);
  36.     dst_init();
  37.     dev_mcast_init();
  38.     rc = 0;
  39. out:
  40.     return rc;
  41. }
一目了然,还是看net_rx_action函数吧

点击(此处)折叠或打开

  1. /*
  2.   *给每个CPU的软中断调度队列分配一个预算,
  3.   * 每个网卡有自己的配额, 一次软中断处理完这个预算,
  4.   *然后再次触法软中断 。。。
  5.   *挂在这个软中断调度队列的所有的网卡以配额为单位去吃这个预算
  6.   *每个网卡以配额为单位吃掉后挂在软中断调度队列的尾部,让给
  7.   *其他网卡去吃预算
  8.   */
  9. static void net_rx_action(struct softirq_action *h)
  10. {
  11.     struct softnet_data *queue = &__get_cpu_var(softnet_data);
  12.     unsigned long start_time = jiffies;
  13.     int budget = netdev_budget; //一次软中断的预算
  14.     void *have;

  15.     local_irq_disable();

  16.     while (!list_empty(&queue->poll_list)) {
  17.         struct net_device *dev;

  18.         if (budget <= 0 || jiffies - start_time > 1) //预算不够了,或者包处理时间超过1s,直接再次触法软中断
  19.             goto softnet_break;

  20.         local_irq_enable();

  21.         dev = list_entry(queue->poll_list.next,
  22.                  struct net_device, poll_list); //遍历链表,找到接收设备
  23.         have = netpoll_poll_lock(dev);

  24.         if (dev->quota <= 0 || dev->poll(dev, &budget)) {
  25.             /*
  26.              *配额用完了或者还有很多数据包要接收
  27.              */
  28.             netpoll_poll_unlock(have);
  29.             local_irq_disable();
  30.             list_move_tail(&dev->poll_list, &queue->poll_list); //把接收链表删除,并再次添加到softnet结构中

  31.             //再次设置配额,然后继续轮询接收剩下的数据包
  32.             if (dev->quota < 0)
  33.                 dev->quota += dev->weight;
  34.             else
  35.                 dev->quota = dev->weight;
  36.         } else {
  37.          //收到的数据包小于配额,则这一次轮询接收就完事了,要打开接收中断,等待数据包的到来
  38.             netpoll_poll_unlock(have);
  39.             dev_put(dev);
  40.             local_irq_disable();
  41.         }
  42.     }
  43. out:
  44. #ifdef CONFIG_NET_DMA
  45.     /*
  46.      * There may not be any more sk_buffs coming right now, so push
  47.      * any pending DMA copies to hardware
  48.      */
  49.     if (net_dma_client) {
  50.         struct dma_chan *chan;
  51.         rcu_read_lock();
  52.         list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node)
  53.             dma_async_memcpy_issue_pending(chan);
  54.         rcu_read_unlock();
  55.     }
  56. #endif
  57.     local_irq_enable();
  58.     return;

  59. softnet_break:
  60.     __get_cpu_var(netdev_rx_stat).time_squeeze++;
  61.     __raise_softirq_irqoff(NET_RX_SOFTIRQ);
  62.     goto out;
  63. }
这个是软中断处理函数流程,napi方式也调用这个函数,处理流程是一样的,但是在non_napi方式中struct net_device *dev; 这个设备是初始化默认的backlog_dev 设备,所以dev->poll(dev, &budget)) 调用的
是backlog_dev 设备注册的queue->backlog_dev.poll = process_backlog; //non_napi   函数, napi方式中设备是自己创建的,处理函数也是自己注册的。现在看process_backlog这个函数就行了

点击(此处)折叠或打开

  1. static int process_backlog(struct net_device *backlog_dev, int *budget)
  2. {
  3.     int work = 0;
  4.     int quota = min(backlog_dev->quota, *budget);
  5.     struct softnet_data *queue = &__get_cpu_var(softnet_data);
  6.     unsigned long start_time = jiffies;

  7.     backlog_dev->weight = weight_p;
  8.     for (;;) {
  9.         struct sk_buff *skb;
  10.         struct net_device *dev;

  11.         local_irq_disable();
  12.         skb = __skb_dequeue(&queue->input_pkt_queue);
  13.         if (!skb)
  14.             goto job_done;
  15.         local_irq_enable();

  16.         dev = skb->dev;

  17.         netif_receive_skb(skb);

  18.         dev_put(dev);

  19.         work++;

  20.         if (work >= quota || jiffies - start_time > 1)
  21.             break;

  22.     }

  23.     backlog_dev->quota -= work;
  24.     *budget -= work;
  25.     return -1;

  26. job_done:
  27.     backlog_dev->quota -= work;
  28.     *budget -= work;

  29.     list_del(&backlog_dev->poll_list);
  30.     smp_mb__before_clear_bit();
  31.     netif_poll_enable(backlog_dev);

  32.     local_irq_enable();
  33.     return 0;
  34. }

在这里我们看到了从接受队列中取包skb = __skb_dequeue(&queue->input_pkt_queue);    送到上层协议栈 netif_receive_skb(skb);  减少预算和配额backlog_dev->quota -= work; *budget -= work;等操作



over ...



阅读(11285) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~