II)接收数据包部分
1. 采用non_napi方式
在中断函数ixgb_intr()中,调用了ixgb_clean_rx_irq()函数来进行中断处理。在这里non_napi方式和napi方式采用了同名的函数,napi多了2个参数如下:
static boolean_t
#ifdef CONFIG_IXGB_NAPI
ixgb_clean_rx_irq(struct ixgb_adapter *adapter, int *work_done, int work_to_do)
#else
ixgb_clean_rx_irq(struct ixgb_adapter *adapter)
#endif
-
static boolean_t
-
#ifdef CONFIG_IXGB_NAPI
-
ixgb_clean_rx_irq(struct ixgb_adapter *adapter, int *work_done, int work_to_do)
-
#else
-
ixgb_clean_rx_irq(struct ixgb_adapter *adapter)
-
#endif
-
{
-
struct ixgb_desc_ring *rx_ring = &adapter->rx_ring;
-
struct net_device *netdev = adapter->netdev;
-
struct pci_dev *pdev = adapter->pdev;
-
struct ixgb_rx_desc *rx_desc, *next_rxd;
-
struct ixgb_buffer *buffer_info, *next_buffer, *next2_buffer;
-
uint32_t length;
-
unsigned int i, j;
-
boolean_t cleaned = FALSE;
-
-
i = rx_ring->next_to_clean;
-
rx_desc = IXGB_RX_DESC(*rx_ring, i);
-
buffer_info = &rx_ring->buffer_info[i];
-
-
while(rx_desc->status & IXGB_RX_DESC_STATUS_DD) { //如前面分析rx_desc是描述符,进行了一致性DMA映射,所以直接读取其值判断这个中断是否是数据包到来产生的中断
-
struct sk_buff *skb, *next_skb; //因为对skb进行了DMA映射,一次中断最多可以接收rxd->count个数据包,所以这里对rxd->count个描述符轮询
-
u8 status;
-
-
#ifdef CONFIG_IXGB_NAPI
-
if(*work_done >= work_to_do)
-
break;
-
-
(*work_done)++;
-
#endif
-
status = rx_desc->status;
-
skb = buffer_info->skb;
-
buffer_info->skb = NULL;
-
-
prefetch(skb->data);
-
-
if(++i == rx_ring->count) i = 0;
-
next_rxd = IXGB_RX_DESC(*rx_ring, i); //得到下一个描述符
-
prefetch(next_rxd);
-
-
if((j = i + 1) == rx_ring->count) j = 0;
-
next2_buffer = &rx_ring->buffer_info[j]; //预先取得第二个buffer
-
prefetch(next2_buffer);
-
-
next_buffer = &rx_ring->buffer_info[i]; //得到下一个buffer
-
next_skb = next_buffer->skb;
-
prefetch(next_skb);
-
-
cleaned = TRUE;
-
-
pci_unmap_single(pdev,
-
buffer_info->dma,
-
buffer_info->length,
-
PCI_DMA_FROMDEVICE); //把控制权还给CPU
-
-
length = le16_to_cpu(rx_desc->length);
-
-
if(unlikely(!(status & IXGB_RX_DESC_STATUS_EOP))) {
-
-
/* All receives must fit into a single buffer */
-
-
IXGB_DBG("Receive packet consumed multiple buffers "
-
"length<%x>\n", length);
-
-
dev_kfree_skb_irq(skb);
-
goto rxdesc_done;
-
}
-
-
if (unlikely(rx_desc->errors
-
& (IXGB_RX_DESC_ERRORS_CE | IXGB_RX_DESC_ERRORS_SE
-
| IXGB_RX_DESC_ERRORS_P |
-
IXGB_RX_DESC_ERRORS_RXE))) {
-
-
dev_kfree_skb_irq(skb);
-
goto rxdesc_done;
-
}
-
-
/* code added for copybreak, this should improve
-
* performance for small packets with large amounts
-
* of reassembly being done in the stack */
-
#define IXGB_CB_LENGTH 256
-
if (length < IXGB_CB_LENGTH) {
-
struct sk_buff *new_skb =
-
netdev_alloc_skb(netdev, length + NET_IP_ALIGN);
-
if (new_skb) {
-
skb_reserve(new_skb, NET_IP_ALIGN);
-
memcpy(new_skb->data - NET_IP_ALIGN,
-
skb->data - NET_IP_ALIGN,
-
length + NET_IP_ALIGN);
-
/* save the skb in buffer_info as good */
-
buffer_info->skb = skb;
-
skb = new_skb;
-
}
-
}
-
/* end copybreak code */
-
-
/* Good Receive */
-
skb_put(skb, length);
-
-
/* Receive Checksum Offload */
-
ixgb_rx_checksum(adapter, rx_desc, skb);
-
-
skb->protocol = eth_type_trans(skb, netdev);
-
#ifdef CONFIG_IXGB_NAPI
-
if(adapter->vlgrp && (status & IXGB_RX_DESC_STATUS_VP)) {
-
vlan_hwaccel_receive_skb(skb, adapter->vlgrp,
-
le16_to_cpu(rx_desc->special) &
-
IXGB_RX_DESC_SPECIAL_VLAN_MASK);
-
} else {
-
netif_receive_skb(skb);
-
}
-
#else /* CONFIG_IXGB_NAPI */
-
if(adapter->vlgrp && (status & IXGB_RX_DESC_STATUS_VP)) {
-
vlan_hwaccel_rx(skb, adapter->vlgrp,
-
le16_to_cpu(rx_desc->special) &
-
IXGB_RX_DESC_SPECIAL_VLAN_MASK);
-
} else {
-
netif_rx(skb); //non_napi 调用的接收函数
-
}
-
#endif /* CONFIG_IXGB_NAPI */
-
netdev->last_rx = jiffies;
-
-
rxdesc_done:
-
/* clean up descriptor, might be written over by hw */
-
rx_desc->status = 0;
-
-
/* use prefetched values */
-
rx_desc = next_rxd; //指向下一个描述符
-
buffer_info = next_buffer; //指向下一个buffer
-
}
-
-
rx_ring->next_to_clean = i; //移动位置
-
-
ixgb_alloc_rx_buffers(adapter); //再分配前面吃掉的skb,用多少再分配多少
-
-
return cleaned;
-
}
还是先把代码贴上吧。 基本上看注释就行了, 接下来看
netif_rx(skb); 还是先贴代码,给代码加注释
-
int netif_rx(struct sk_buff *skb)
-
{
-
struct softnet_data *queue;
-
unsigned long flags;
-
-
/* if netpoll wants it, pretend we never saw it */
-
if (netpoll_rx(skb)) //稍微看了一下这个函数只处理ARP和UDP数据包,具体为什么以后再分析
-
return NET_RX_DROP;
-
-
if (!skb->tstamp.off_sec)
-
net_timestamp(skb);
-
-
/*
-
* The code is rearranged so that the path is the most
-
* short when CPU is congested, but is still operating.
-
*/
-
local_irq_save(flags);
-
queue = &__get_cpu_var(softnet_data); //得到当前CPU的软中断调度队列,每个CPU有自己的软中断调度队列
-
-
__get_cpu_var(netdev_rx_stat).total++;
-
if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { //这个是软中断调度队列的预算,这个值默认为300,可以在/proc文件中改变
-
if (queue->input_pkt_queue.qlen) {
-
enqueue:
-
dev_hold(skb->dev);
-
__skb_queue_tail(&queue->input_pkt_queue, skb); //在这里把skb挂到input_pkt_queue接收队列中,接着就退出了,因为在中断函数不能做太多的事情。
-
//接收软中断处理从input_pkt_queue取走skb交给上层协议栈,
-
-
local_irq_restore(flags);
-
return NET_RX_SUCCESS;
-
}
-
-
netif_rx_schedule(&queue->backlog_dev); //这个函数把backlog_dev设备挂接到软中断队列中,分配这个网卡设备的配额,并唤醒接收软件中断,注意当
-
//queue->input_pkt_queue.qlen == 0, 即队列为空时才调用这个函数
-
goto enqueue;
-
}
-
-
__get_cpu_var(netdev_rx_stat).dropped++;
-
local_irq_restore(flags);
-
-
kfree_skb(skb); //如果上面的if条件不满足,即网卡接收数据包快,来不及处理,则直接丢包。
-
return NET_RX_DROP;
-
}
在这个函数中接收中断处理函数就算结束了,总结一下接收中断处理函数所干的工作就是来一个接收中断,就把DMA内存中所有的数据包挂在接收队列中,如果接收队列为空就唤醒接收软件中断去接收,如果包过快,来不及处理就直接在这里丢包。下面就看接收软件中断函数了:
-
/*
* Initialize the DEV module. At boot time this walks the device list and
* unhooks any devices that fail to initialise (normally hardware not
* present) and leaves us with a valid list of present and active devices.
*
*/
-
static int __init net_dev_init(void)
-
{
-
int i, rc = -ENOMEM;
-
-
BUG_ON(!dev_boot_phase);
-
-
if (dev_proc_init())
-
goto out;
-
-
if (netdev_sysfs_init())
-
goto out;
-
-
INIT_LIST_HEAD(&ptype_all);
-
for (i = 0; i < 16; i++)
-
INIT_LIST_HEAD(&ptype_base[i]);
-
-
for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
-
INIT_HLIST_HEAD(&dev_name_head[i]);
-
-
for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
-
INIT_HLIST_HEAD(&dev_index_head[i]);
-
-
/*
-
* Initialise the packet receive queues.
-
*/
-
-
for_each_possible_cpu(i) {
-
struct softnet_data *queue;
-
-
queue = &per_cpu(softnet_data, i);
-
skb_queue_head_init(&queue->input_pkt_queue); //non_napi
-
queue->completion_queue = NULL;
-
INIT_LIST_HEAD(&queue->poll_list); //napi
-
set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
-
queue->backlog_dev.weight = weight_p; //non_napi 默认配额为64
-
queue->backlog_dev.poll = process_backlog; //non_napi
-
atomic_set(&queue->backlog_dev.refcnt, 1);
-
}
-
-
netdev_dma_register();
-
-
dev_boot_phase = 0;
-
-
open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
-
open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
-
-
hotcpu_notifier(dev_cpu_callback, 0);
-
dst_init();
-
dev_mcast_init();
-
rc = 0;
-
out:
-
return rc;
-
}
一目了然,还是看net_rx_action函数吧
-
/*
-
*给每个CPU的软中断调度队列分配一个预算,
-
* 每个网卡有自己的配额, 一次软中断处理完这个预算,
-
*然后再次触法软中断 。。。
-
*挂在这个软中断调度队列的所有的网卡以配额为单位去吃这个预算
-
*每个网卡以配额为单位吃掉后挂在软中断调度队列的尾部,让给
-
*其他网卡去吃预算
-
*/
-
static void net_rx_action(struct softirq_action *h)
-
{
-
struct softnet_data *queue = &__get_cpu_var(softnet_data);
-
unsigned long start_time = jiffies;
-
int budget = netdev_budget; //一次软中断的预算
-
void *have;
-
-
local_irq_disable();
-
-
while (!list_empty(&queue->poll_list)) {
-
struct net_device *dev;
-
-
if (budget <= 0 || jiffies - start_time > 1) //预算不够了,或者包处理时间超过1s,直接再次触法软中断
-
goto softnet_break;
-
-
local_irq_enable();
-
-
dev = list_entry(queue->poll_list.next,
-
struct net_device, poll_list); //遍历链表,找到接收设备
-
have = netpoll_poll_lock(dev);
-
-
if (dev->quota <= 0 || dev->poll(dev, &budget)) {
-
/*
-
*配额用完了或者还有很多数据包要接收
-
*/
-
netpoll_poll_unlock(have);
-
local_irq_disable();
-
list_move_tail(&dev->poll_list, &queue->poll_list); //把接收链表删除,并再次添加到softnet结构中
-
-
//再次设置配额,然后继续轮询接收剩下的数据包
-
if (dev->quota < 0)
-
dev->quota += dev->weight;
-
else
-
dev->quota = dev->weight;
-
} else {
-
//收到的数据包小于配额,则这一次轮询接收就完事了,要打开接收中断,等待数据包的到来
-
netpoll_poll_unlock(have);
-
dev_put(dev);
-
local_irq_disable();
-
}
-
}
-
out:
-
#ifdef CONFIG_NET_DMA
-
/*
-
* There may not be any more sk_buffs coming right now, so push
-
* any pending DMA copies to hardware
-
*/
-
if (net_dma_client) {
-
struct dma_chan *chan;
-
rcu_read_lock();
-
list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node)
-
dma_async_memcpy_issue_pending(chan);
-
rcu_read_unlock();
-
}
-
#endif
-
local_irq_enable();
-
return;
-
-
softnet_break:
-
__get_cpu_var(netdev_rx_stat).time_squeeze++;
-
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
-
goto out;
-
}
这个是软中断处理函数流程,napi方式也调用这个函数,处理流程是一样的,但是在non_napi方式中struct net_device
*dev
; 这个设备是初始化默认的backlog_dev 设备,所以dev
->poll
(dev
, &budget
)) 调用的
是backlog_dev 设备注册的queue
->backlog_dev
.poll
= process_backlog
; //non_napi 函数, napi方式中设备是自己创建的,处理函数也是自己注册的。现在看process_backlog这个函数就行了
-
static int process_backlog(struct net_device *backlog_dev, int *budget)
-
{
-
int work = 0;
-
int quota = min(backlog_dev->quota, *budget);
-
struct softnet_data *queue = &__get_cpu_var(softnet_data);
-
unsigned long start_time = jiffies;
-
-
backlog_dev->weight = weight_p;
-
for (;;) {
-
struct sk_buff *skb;
-
struct net_device *dev;
-
-
local_irq_disable();
-
skb = __skb_dequeue(&queue->input_pkt_queue);
-
if (!skb)
-
goto job_done;
-
local_irq_enable();
-
-
dev = skb->dev;
-
-
netif_receive_skb(skb);
-
-
dev_put(dev);
-
-
work++;
-
-
if (work >= quota || jiffies - start_time > 1)
-
break;
-
-
}
-
-
backlog_dev->quota -= work;
-
*budget -= work;
-
return -1;
-
-
job_done:
-
backlog_dev->quota -= work;
-
*budget -= work;
-
-
list_del(&backlog_dev->poll_list);
-
smp_mb__before_clear_bit();
-
netif_poll_enable(backlog_dev);
-
-
local_irq_enable();
-
return 0;
-
}
在这里我们看到了从接受队列中取包skb
= __skb_dequeue
(&queue
->input_pkt_queue
); 送到上层协议栈 netif_receive_skb
(skb
); 减少预算和配额backlog_dev
->quota
-= work
; *budget
-= work
;等操作
over ...
阅读(11285) | 评论(0) | 转发(0) |