e100网卡驱动
在模块的初始化函数中调用pci_register_driver(&e100_driver)向内核注册e100驱动
2911 static struct pci_driver e100_driver = {
2912 .name = DRV_NAME,
2913 .id_table = e100_id_table,
2914 .probe = e100_probe,
2915 .remove = __devexit_p(e100_remove),
2916 #ifdef CONFIG_PM
2917 /* Power Management hooks */
2918 .suspend = e100_suspend,
2919 .resume = e100_resume,
2920 #endif
2921 .shutdown = e100_shutdown,
2922 .err_handler = &e100_err_handler,
2923 };
内核检测到网卡后(vid,pid在idtable中),会调用e100_probe函数,probe函数创建netdevice结构和e100私有的nic结构,将其初始化,并通过register_netdev注册对应的netdevice结构,具体流程如下:
//为其分配内存
netdev = alloc_etherdev(sizeof(struct nic))
//初始化对应的函数指针
2627 netdev->open = e100_open;
2628 netdev->stop = e100_close;
2629 netdev->hard_start_xmit = e100_xmit_frame;
2630 netdev->set_multicast_list = e100_set_multicast_list;
2631 netdev->set_mac_address = e100_set_mac_address;
2632 netdev->change_mtu = e100_change_mtu;
2633 netdev->do_ioctl = e100_do_ioctl;
2634 SET_ETHTOOL_OPS(netdev, &e100_ethtool_ops);
2635 netdev->tx_timeout = e100_tx_timeout;
2636 netdev->watchdog_timeo = E100_WATCHDOG_PERIOD;
//napi支持,分配napi结构,初始化,并挂在netdevice的napi_list上
2642 nic = netdev_priv(netdev);
2643 netif_napi_add(netdev, &nic->napi, e100_poll, E100_NAPI_WEIGHT);
//nic的初始化
2702 init_timer(&nic->watchdog);
2703 nic->watchdog.function = e100_watchdog;
2704 nic->watchdog.data = (unsigned long)nic;
2705 init_timer(&nic->blink_timer);
2706 nic->blink_timer.function = e100_blink_led;
2707 nic->blink_timer.data = (unsigned long)nic;
2708
2709 INIT_WORK(&nic->tx_timeout_task, e100_tx_timeout_task);
//向内核注册
register_netdev(netdev)
当驱动激活的时候调用e100_open()函数
//初始化和分配rxs数据接收链表
2124 if((err = e100_rx_alloc_list(nic)))
2125 return err;
//分配和初始化cbs命令链表,数据的发送通过这个
2126 if((err = e100_alloc_cbs(nic)))
2127 goto err_rx_clean_list;
2128 if((err = e100_hw_init(nic)))
2129 goto err_clean_cbs;
2130 e100_set_multicast_list(nic->netdev);
2131 e100_start_receiver(nic, NULL);
2132 mod_timer(&nic->watchdog, jiffies);
2133 if((err = request_irq(nic->pdev->irq, e100_intr, IRQF_SHARED,
2134 nic->netdev->name, nic->netdev)))
2135 goto err_no_irq;
2136 netif_wake_queue(nic->netdev);
2137 napi_enable(&nic->napi);
数据的发送
static int e100_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
1637 {
1650 err = e100_exec_cb(nic, skb, e100_xmit_prepare);
1651
1652 switch(err) {
1653 case -ENOSPC:
1654 /* We queued the skb, but now we're out of space. */
1655 DPRINTK(TX_ERR, DEBUG, "No space for CB\n");
1656 netif_stop_queue(netdev);
1657 break;
1658 case -ENOMEM:
1659 /* This is a hard error - log it. */
1660 DPRINTK(TX_ERR, DEBUG, "Out of Tx resources, returning skb\n");
1661 netif_stop_queue(netdev);
1662 return 1;
1663 }
1664
1665 netdev->trans_start = jiffies;
1666 return 0;
1667 }
通过e100_exec_cb()发送数据,并对发送的错误进行处理,如果发送失败,并net_device的发送队列设置相应的标志,参见netif_stop_queue。
其中 e100_xmit_prepare对cb作一些设置,e100_exec_cb才发送数据.
1619 static void e100_xmit_prepare(struct nic *nic, struct cb *cb,
1620 struct sk_buff *skb)
1621 {
1622 cb->command = nic->tx_command;
1623 /* interrupt every 16 packets regardless of delay */
1624 if((nic->cbs_avail & ~15) == nic->cbs_avail)
1625 cb->command |= cpu_to_le16(cb_i);
1626 cb->u.tcb.tbd_array = cb->dma_addr + offsetof(struct cb, u.tcb.tbd);
1627 cb->u.tcb.tcb_byte_count = 0;
1628 cb->u.tcb.threshold = nic->tx_threshold;
1629 cb->u.tcb.tbd_count = 1;
1630 cb->u.tcb.tbd.buf_addr = cpu_to_le32(pci_map_single(nic->pdev,
1631 skb->data, skb->len, PCI_DMA_TODEVICE));
1632 /* check for mapping failure? */
1633 cb->u.tcb.tbd.size = cpu_to_le16(skb->len);
1634 }
e100_exec_cb()则对nic上的cb链表进行处理
837 static int e100_exec_cb(struct nic *nic, struct sk_buff *skb,
838 void (*cb_prepare)(struct nic *, struct cb *, struct sk_buff *))
839 {
//使用一个空闲cb结构
851 cb = nic->cb_to_use;
852 nic->cb_to_use = cb->next;
853 nic->cbs_avail--;
854 cb->skb = skb;
//调用e100_xmit_prepare对cb进行设置
859 cb_prepare(nic, cb, skb);
//对链表上的cb进行处理
867 while(nic->cb_to_send != nic->cb_to_use) {
868 if(unlikely(e100_exec_cmd(nic, nic->cuc_cmd,
869 nic->cb_to_send->dma_addr))) {
870 /* Ok, here's where things get sticky. It's
871 * possible that we can't schedule the command
872 * because the controller is too busy, so
873 * let's just queue the command and try again
874 * when another command is scheduled. */
875 if(err == -ENOSPC) {
876 //request a reset
877 schedule_work(&nic->tx_timeout_task);
878 }
879 break;
880 } else {
881 nic->cuc_cmd = cuc_resume;
882 nic->cb_to_send = nic->cb_to_send->next;
883 }
884 }
e100_exec_cmd执行具体的操作,到这儿,哥就不懂了,有很多硬件相关的东西。有那些cmd?dma起了什么作用?
数据的接受
注册的中断函数 e100_intr,经典的中断处理例程
2032 static irqreturn_t e100_intr(int irq, void *dev_id)
2033 {
2034 struct net_device *netdev = dev_id;
2035 struct nic *nic = netdev_priv(netdev);
//读取寄存器,判断中断是我们产生的还是其他设备产生的
2036 u8 stat_ack = ioread8(&nic->csr->scb.stat_ack);
2037
2038 DPRINTK(INTR, DEBUG, "stat_ack = 0x%02X\n", stat_ack);
2039
2040 if(stat_ack == stat_ack_not_ours || /* Not our interrupt */
2041 stat_ack == stat_ack_not_present) /* Hardware is ejected */
2042 return IRQ_NONE;
2043
//ack 我们的设备
2044 /* Ack interrupt(s) */
2045 iowrite8(stat_ack, &nic->csr->scb.stat_ack);
2046
2047 /* We hit Receive No Resource (RNR); restart RU after cleaning */
2048 if(stat_ack & stat_ack_rnr)
2049 nic->ru_running = RU_SUSPENDED;
2050
//把设备的poll放到softnet_data的poll_list上
2051 if(likely(netif_rx_schedule_prep(netdev, &nic->napi))) {
2052 e100_disable_irq(nic);
2053 __netif_rx_schedule(netdev, &nic->napi);
2054 }
2055
2056 return IRQ_HANDLED;
2057 }
数据的接收用的是napi方法,netif_rx_schedule()最终会调用napi_schedule把设备的poll方法挂在poll_list上。
2359 void __napi_schedule(struct napi_struct *n)
2360 {
2361 unsigned long flags;
2362
2363 local_irq_save(flags);
2364 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2365 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2366 local_irq_restore(flags);
2367 }
设备的poll方法
2059 static int e100_poll(struct napi_struct *napi, int budget)
2060 {
2061 struct nic *nic = container_of(napi, struct nic, napi);
2062 struct net_device *netdev = nic->netdev;
2063 unsigned int work_done = 0;
2064
2065 e100_rx_clean(nic, &work_done, budget);
2066 e100_tx_clean(nic);
2067
2068 /* If budget not fully consumed, exit the polling mode */
2069 if (work_done < budget) {
2070 netif_rx_complete(netdev, napi);
2071 e100_enable_irq(nic);
2072 }
2073
2074 return work_done;
2075 }
最终调用netif_recive_skb将数据包提交到上层。具体到硬件还是不太懂啊.:(
内核对数据的接收
有两种模式中断和NAPI。
先说核心的数据结构
996 /*
997 * Incoming packets are placed on per-cpu queues so that
998 * no locking is needed.
999 */
1000 struct softnet_data
1001 {
1002 struct Qdisc *output_queue;
1003 struct sk_buff_head input_pkt_queue;
1004 struct list_head poll_list;
1005 struct sk_buff *completion_queue;
1006
1007 struct napi_struct backlog;
1008 #ifdef CONFIG_NET_DMA
1009 struct dma_chan *net_dma;
1010 #endif
1011 };
正如注释所说,每个CPU都有一个,当涉及它的操作时,先关中断,所以不需要锁的保护
普通中断模式下,中断通过netif_rx把数据提交给input_pkt_queue,如果backlog没有添加到poll_list中,添加之。
而在NAPI下只是调用netif_rx_schedule判断是否需要把设备的poll方法添加到poll_list中.
添加后raise_softirq(NET_RX_ACTION),调用net_rx_action软中断进行后续的处理;
net_rx_action遍历poll_list链表,调用其poll方法,对非NAPI设备来说就是通过process_backlog通用的poll方法把input_pkt_queue上的数据提交到上层,而对NAPI来说调用自身的poll方法。其中包含流量控制和对poll调用次数的限制。
process_backlog通过netif_receive_skb处理桥接,数据包监听,最后ip_rcv传给ip层
以e100_poll为例,其遍历rxs接收数据链表,对缓冲区的数据作unmap操作,然后调用netif_receive_skb传给上层。但调用napi的poll函数时在哪儿禁用对应设备的中断?
数据的发送:
两种方式:
1,dev_queue_xmit,会经过内核的流量控制子系统qdisc_run,最红调用hard_start_xmit
2,直接调用hard_start_xmit()发送数据
net_tx_action()作用
1,释放完成队列的内存,2,调用qdisc_run对output队列上的数据