Chinaunix首页 | 论坛 | 博客
  • 博客访问: 3448423
  • 博文数量: 198
  • 博客积分: 0
  • 博客等级: 民兵
  • 技术积分: 7256
  • 用 户 组: 普通用户
  • 注册时间: 2013-01-23 18:56
个人简介

将晦涩难懂的技术讲的通俗易懂

文章分类

全部博文(198)

文章存档

2024年(1)

2023年(9)

2022年(4)

2021年(12)

2020年(8)

2019年(18)

2018年(19)

2017年(9)

2016年(26)

2015年(18)

2014年(54)

2013年(20)

分类: LINUX

2019-02-24 18:14:07

dpdk net_virtio前端驱动实现分析

——lvyilong316

kernel中的vhost-net对应,net_virtiodpdk中实现的网络virtio的前端。相对于kernel dpdknet_virtio实现要简单很多,但是调理更加清晰。所以当我们想了解前端是如何工作的时候,分析dpdk net_virtio网络前端是一个有效的方法。下面以dpdk 18.11为例分析一下net_virtio前端的大体实现流程。

和传统驱动一样,首先要从probe函数开始,net_virtioprobe函数为eth_virtio_pci_probe,通过以下结构体在INIT时注册:

点击(此处)折叠或打开

  1. static struct rte_pci_driver rte_virtio_pmd = {
  2.     .driver = {
  3.         .name = "net_virtio",
  4.     },
  5.     .id_table = pci_id_virtio_map,
  6.     .drv_flags = 0,
  7.     .probe = eth_virtio_pci_probe,
  8.     .remove = eth_virtio_pci_remove,
  9. };

  10. RTE_INIT(rte_virtio_pmd_init)
  11. {
  12.     rte_eal_iopl_init();
  13.     rte_pci_register(&rte_virtio_pmd);
  14. }

在分析eth_virtio_pci_probe函数前,我们先看pci_id_virtio_map

点击(此处)折叠或打开

  1. /* VirtIO PCI vendor/device ID. */
  2. #define VIRTIO_PCI_VENDORID 0x1AF4
  3. #define VIRTIO_PCI_LEGACY_DEVICEID_NET 0x1000
  4. #define VIRTIO_PCI_MODERN_DEVICEID_NET 0x1041

  5. /*
  6.  * The set of PCI devices this driver supports
  7.  */
  8. static const struct rte_pci_id pci_id_virtio_map[] = {
  9.     { RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_LEGACY_DEVICEID_NET) },
  10.     { RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_MODERN_DEVICEID_NET) },
  11.     { .vendor_id = 0, /* sentinel */ },
  12. };

可以看到前端驱动支持两种类型的virtio网络设备,一种是legacy的一种是modern的。所以之后的处理逻辑中我们将会看到有很多地方是需要区别处理的。

 

l  eth_virtio_pci_probe

点击(此处)折叠或打开

  1. static int eth_virtio_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
  2.     struct rte_pci_device *pci_dev)
  3. {
  4.     if (rte_eal_iopl_init() != 0) { /*通过iopl函数为调用进程设置I/O端口访问权限,只在i386平台需要*/
  5.         PMD_INIT_LOG(ERR, "IOPL call failed - cannot use virtio PMD");
  6.         return 1;
  7.     }

  8.     /* virtio pmd skips probe if device needs to work in vdpa mode */
  9.     /* 如果指定了vdpa模式就跳过后续的probe流程 */
  10.     if (vdpa_mode_selected(pci_dev->device.devargs))
  11.         return 1;

  12.     return rte_eth_dev_pci_generic_probe(pci_dev, sizeof(struct virtio_hw),
  13.         eth_virtio_dev_init);
  14. }

l  rte_eth_dev_pci_generic_probe

点击(此处)折叠或打开

  1. static inline int
  2. rte_eth_dev_pci_generic_probe(struct rte_pci_device *pci_dev,
  3.     size_t private_data_size, eth_dev_pci_callback_t dev_init)
  4. {
  5.     struct rte_eth_dev *eth_dev;
  6.     int ret;
  7.     /* private_data_size = sizeof(struct virtio_hw) */
  8.     /* 从全局rte_eth_devices[]数组中分配struct rte_eth_dev结构,确定port_id,
  9.      * 分配私有数据eth_dev->data->dev_private指向struct virtio_hw */
  10.     eth_dev = rte_eth_dev_pci_allocate(pci_dev, private_data_size);
  11.     if (!eth_dev)
  12.         return -ENOMEM;

  13.     RTE_FUNC_PTR_OR_ERR_RET(*dev_init, -EINVAL);
  14.     ret = dev_init(eth_dev); /* eth_virtio_dev_init */
  15.     if (ret)
  16.         rte_eth_dev_pci_release(eth_dev);
  17.     else
  18.         rte_eth_dev_probing_finish(eth_dev);

  19.     return ret;
  20. }

其中首先调用rte_eth_dev_pci_allocate,该函数从全局rte_eth_devices[]数组中分配struct rte_eth_dev结构,确定port_id,同时分配私有数据eth_dev->data->dev_private指向struct virtio_hw;然后调用dev_init,也就是参数eth_virtio_dev_init函数,我们后面重点分析;最后调用rte_eth_dev_probing_finish触发RTE_ETH_EVENT_NEW事件将设备状态设置为RTE_ETH_DEV_ATTACHED。相关数据结构关系如下图:

l  rte_eth_dev_probing_finish

点击(此处)折叠或打开

  1. void
  2. rte_eth_dev_probing_finish(struct rte_eth_dev *dev)
  3. {
  4.     if (dev == NULL)
  5.         return;

  6.     _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_NEW, NULL);

  7.     dev->state = RTE_ETH_DEV_ATTACHED;
  8. }

下面重点看下net-virtio设备的核心初始化函数eth_virtio_dev_init

l  eth_virtio_dev_init

点击(此处)折叠或打开

  1. int
  2. eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
  3. {
  4.     struct virtio_hw *hw = eth_dev->data->dev_private;
  5.     int ret;

  6.     RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr_mrg_rxbuf));

  7.     eth_dev->dev_ops = &virtio_eth_dev_ops; /* 初始化virtio的设备处理函数 */

  8.     if (rte_eal_process_type() == RTE_PROC_SECONDARY) { /* 如果进程是SECONDARY的处理逻辑 */
  9.        /....../
  10.         return 0;
  11.     }

  12.     /* Allocate memory for storing MAC addresses */
  13.     eth_dev->data->mac_addrs = rte_zmalloc("virtio", VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN, 0);
  14.     if (eth_dev->data->mac_addrs == NULL) {
  15.         return -ENOMEM;
  16.     }

  17.     hw->port_id = eth_dev->data->port_id;
  18.     /* For virtio_user case the hw->virtio_user_dev is populated by
  19.      * virtio_user_eth_dev_alloc() before eth_virtio_dev_init() is called.
  20.      */
  21.      /* 如果不是virtio_user设备 */
  22.     if (!hw->virtio_user_dev) {
  23.         ret = vtpci_init(RTE_ETH_DEV_TO_PCI(eth_dev), hw);/* 设备的pci信息初始化*/
  24.         if (ret)
  25.             goto out;
  26.     }

  27.     /* reset device and negotiate default features */
  28.     /* 完成前后端协商,确定feature的支持 */
  29.     ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES);
  30.     if (ret < 0)
  31.         goto out;

  32.     return 0;

  33. out:
  34.     rte_free(eth_dev->data->mac_addrs);
  35.     return ret;
  36. }

    其中关键是调用了两个函数,一个是vtpci_init ,该函数读取设备的PCI配置空间进行初始化,同时判断设备是modern还是lagecy;另一个是virtio_init_device ,该函数主要完成前后端协商,进一步初始化设备,如确定设备支持feature等。下面首先分析vtpci_init

l  vtpci_init

点击(此处)折叠或打开

  1. int
  2. vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw)
  3. {
  4.     /*
  5.      * Try if we can succeed reading virtio pci caps, which exists
  6.      * only on modern pci device. If failed, we fallback to legacy
  7.      * virtio handling.
  8.      */
  9.     if (virtio_read_caps(dev, hw) == 0) {
  10.         PMD_INIT_LOG(INFO, "modern virtio pci detected.");
  11.         virtio_hw_internal[hw->port_id].vtpci_ops = &modern_ops;
  12.         hw->modern = 1;
  13.         return 0;
  14.     }

  15.     PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
  16.     /* 对于lagacy需要进行ioport mmap */
  17.     if (rte_pci_ioport_map(dev, 0, VTPCI_IO(hw)) < 0) {
  18.         return -1;
  19.     }

  20.     virtio_hw_internal[hw->port_id].vtpci_ops = &legacy_ops;
  21.     hw->modern = 0;

  22.     return 0;
  23. }

    其中主要通过virtio_read_caps 读取PCI配置空间,初始化设备,然后根据PCI支持的能力确定设备是modern还是lagacy,分别初始化对应的ops。我们看下virtio_read_caps 中是根据什么判断设备是modern的。

l  virtio_read_caps

点击(此处)折叠或打开

  1. static int
  2. virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
  3. {
  4.     uint8_t pos;
  5.     struct virtio_pci_cap cap;
  6.     int ret;
  7.     /* 通过UIO或VFIO进行mmap */
  8.     if (rte_pci_map_device(dev)) {
  9.         PMD_INIT_LOG(DEBUG, "failed to map pci device!");
  10.         return -1;
  11.     }
  12.     /* 读取PCI设备capability list */
  13.     ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
  14.     if (ret != 1) {
  15.         PMD_INIT_LOG(DEBUG,
  16.              "failed to read pci capability list, ret %d", ret);
  17.         return -1;
  18.     }
  19.     /* 读取PCI设备配置空间capability,初始化hw中的字段,如是否支持MSIX等 */
  20.     while (pos) {
  21.         ret = rte_pci_read_config(dev, &cap, 2, pos);
  22.         if (ret != 2) {
  23.             PMD_INIT_LOG(DEBUG,
  24.                  "failed to read pci cap at pos: %x ret %d",
  25.                  pos, ret);
  26.             break;
  27.         }

  28.         if (cap.cap_vndr == PCI_CAP_ID_MSIX) {
  29.             /* Transitional devices would also have this capability,
  30.              * that's why we also check if msix is enabled.
  31.              * 1st byte is cap ID; 2nd byte is the position of next
  32.              * cap; next two bytes are the flags.
  33.              */
  34.             uint16_t flags;

  35.             ret = rte_pci_read_config(dev, &flags, sizeof(flags),
  36.                     pos + 2);
  37.             if (ret != sizeof(flags)) {
  38.                 PMD_INIT_LOG(DEBUG,
  39.                      "failed to read pci cap at pos:"
  40.                      " %x ret %d", pos + 2, ret);
  41.                 break;
  42.             }

  43.             if (flags & PCI_MSIX_ENABLE)
  44.                 hw->use_msix = VIRTIO_MSIX_ENABLED;
  45.             else
  46.                 hw->use_msix = VIRTIO_MSIX_DISABLED;
  47.         }

  48.         if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
  49.             PMD_INIT_LOG(DEBUG,
  50.                 "[%2x] skipping non VNDR cap id: %02x",
  51.                 pos, cap.cap_vndr);
  52.             goto next;
  53.         }

  54.         ret = rte_pci_read_config(dev, &cap, sizeof(cap), pos);
  55.         if (ret != sizeof(cap)) {
  56.             PMD_INIT_LOG(DEBUG,
  57.                  "failed to read pci cap at pos: %x ret %d",
  58.                  pos, ret);
  59.             break;
  60.         }

  61.         PMD_INIT_LOG(DEBUG,
  62.             "[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u",
  63.             pos, cap.cfg_type, cap.bar, cap.offset, cap.length);

  64.         switch (cap.cfg_type) {
  65.         case VIRTIO_PCI_CAP_COMMON_CFG:
  66.             hw->common_cfg = get_cfg_addr(dev, &cap);
  67.             break;
  68.         case VIRTIO_PCI_CAP_NOTIFY_CFG:
  69.             ret = rte_pci_read_config(dev,
  70.                     &hw->notify_off_multiplier,
  71.                     4, pos + sizeof(cap));
  72.             if (ret != 4)
  73.                 PMD_INIT_LOG(DEBUG,
  74.                     "failed to read notify_off_multiplier, ret %d",
  75.                     ret);
  76.             else
  77.                 hw->notify_base = get_cfg_addr(dev, &cap);
  78.             break;
  79.         case VIRTIO_PCI_CAP_DEVICE_CFG:
  80.             hw->dev_cfg = get_cfg_addr(dev, &cap);
  81.             break;
  82.         case VIRTIO_PCI_CAP_ISR_CFG:
  83.             hw->isr = get_cfg_addr(dev, &cap);
  84.             break;
  85.         }

  86. next:
  87.         pos = cap.cap_next;
  88.     }
  89.     /* 判断设备是否是modern的,注意只有这些配置都支持才能是modern */
  90.     if (hw->common_cfg == NULL || hw->notify_base == NULL ||
  91.      hw->dev_cfg == NULL || hw->isr == NULL) {
  92.         PMD_INIT_LOG(INFO, "no modern virtio pci device found.");
  93.         return -1;
  94.     }

  95.     PMD_INIT_LOG(INFO, "found modern virtio pci device.");

  96.     PMD_INIT_LOG(DEBUG, "common cfg mapped at: %p", hw->common_cfg);
  97.     PMD_INIT_LOG(DEBUG, "device cfg mapped at: %p", hw->dev_cfg);
  98.     PMD_INIT_LOG(DEBUG, "isr cfg mapped at: %p", hw->isr);
  99.     PMD_INIT_LOG(DEBUG, "notify base: %p, notify off multiplier: %u",
  100.         hw->notify_base, hw->notify_off_multiplier);

  101.     return 0;
  102. }

   可以看到modern设备需要同时满足四个条件:hw->common_cfg == NULL hw->notify_base == NULLhw->dev_cfg == NULL hw->isr == NULL

下面看前后端协商的初始化函数virtio_init_device。注意其第二个调用参数为VIRTIO_PMD_DEFAULT_GUEST_FEATURES ,这个是代码中定义的前端支持的全量feature

ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES);

l  virtio_init_device

点击(此处)折叠或打开

  1. static int
  2. virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
  3. {
  4.     struct virtio_hw *hw = eth_dev->data->dev_private;
  5.     struct virtio_net_config *config;
  6.     struct virtio_net_config local_config;
  7.     struct rte_pci_device *pci_dev = NULL;
  8.     int ret;

  9.     /* Reset the device although not necessary at startup */
  10.     /* 调用ops的set_status(对于modern设备来说是modern_set_status),设置状态为VIRTIO_CONFIG_STATUS_RESET */
  11.     vtpci_reset(hw);

  12.     if (hw->vqs) { /* 如果支持初始化过队列则释放掉 */
  13.         virtio_dev_free_mbufs(eth_dev);
  14.         virtio_free_queues(hw);
  15.     }

  16.     /* Tell the host we've noticed this device. */
  17.     vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);

  18.     /* Tell the host we've known how to drive the device. */
  19.     vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
  20.     /* 完成前后端的feature协商,反馈设置后端feature */
  21.     if (virtio_negotiate_features(hw, req_features) < 0)
  22.         return -1;

  23.     if (!hw->virtio_user_dev) {
  24.         pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
  25.         rte_eth_copy_pci_info(eth_dev, pci_dev);
  26.     }

  27.     /* If host does not support both status and MSI-X then disable LSC */
  28.     if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS) &&
  29.      hw->use_msix != VIRTIO_MSIX_NONE)
  30.         eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
  31.     else
  32.         eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;

  33.     /* Setting up rx_header size for the device */
  34.     if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
  35.      vtpci_with_feature(hw, VIRTIO_F_VERSION_1))
  36.         hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
  37.     else
  38.         hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);

  39.     /* Copy the permanent MAC address to: virtio_hw */
  40.     virtio_get_hwaddr(hw);
  41.     ether_addr_copy((struct ether_addr *) hw->mac_addr,
  42.             &eth_dev->data->mac_addrs[0]);
  43.     PMD_INIT_LOG(DEBUG,
  44.          "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
  45.          hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
  46.          hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
  47.     /* 通过协商后端feature,初始化设备的其他字段,如mtu,MQ的支持 */
  48.     if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
  49.         config = &local_config;

  50.         vtpci_read_dev_config(hw,
  51.             offsetof(struct virtio_net_config, mac),
  52.             &config->mac, sizeof(config->mac));

  53.         if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
  54.             vtpci_read_dev_config(hw,
  55.                 offsetof(struct virtio_net_config, status),
  56.                 &config->status, sizeof(config->status));
  57.         } else {
  58.             PMD_INIT_LOG(DEBUG,
  59.                  "VIRTIO_NET_F_STATUS is not supported");
  60.             config->status = 0;
  61.         }

  62.         if (vtpci_with_feature(hw, VIRTIO_NET_F_MQ)) {
  63.             vtpci_read_dev_config(hw,
  64.                 offsetof(struct virtio_net_config, max_virtqueue_pairs),
  65.                 &config->max_virtqueue_pairs,
  66.                 sizeof(config->max_virtqueue_pairs));
  67.         } else {
  68.             PMD_INIT_LOG(DEBUG,
  69.                  "VIRTIO_NET_F_MQ is not supported");
  70.             config->max_virtqueue_pairs = 1;
  71.         }

  72.         hw->max_queue_pairs = config->max_virtqueue_pairs;

  73.         if (vtpci_with_feature(hw, VIRTIO_NET_F_MTU)) {
  74.             vtpci_read_dev_config(hw,
  75.                 offsetof(struct virtio_net_config, mtu),
  76.                 &config->mtu,
  77.                 sizeof(config->mtu));

  78.             /*
  79.              * MTU value has already been checked at negotiation
  80.              * time, but check again in case it has changed since
  81.              * then, which should not happen.
  82.              */
  83.             if (config->mtu < ETHER_MIN_MTU) {
  84.                 PMD_INIT_LOG(ERR, "invalid max MTU value (%u)",
  85.                         config->mtu);
  86.                 return -1;
  87.             }

  88.             hw->max_mtu = config->mtu;
  89.             /* Set initial MTU to maximum one supported by vhost */
  90.             eth_dev->data->mtu = config->mtu;

  91.         } else {
  92.             hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - ETHER_HDR_LEN -
  93.                 VLAN_TAG_LEN - hw->vtnet_hdr_size;
  94.         }

  95.         PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
  96.                 config->max_virtqueue_pairs);
  97.         PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
  98.         PMD_INIT_LOG(DEBUG,
  99.                 "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
  100.                 config->mac[0], config->mac[1],
  101.                 config->mac[2], config->mac[3],
  102.                 config->mac[4], config->mac[5]);
  103.     } else {
  104.         PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1");
  105.         hw->max_queue_pairs = 1;
  106.         hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - ETHER_HDR_LEN -
  107.             VLAN_TAG_LEN - hw->vtnet_hdr_size;
  108.     }
  109.     /* 分配初始化设备的vq队列 */
  110.     ret = virtio_alloc_queues(eth_dev);
  111.     if (ret < 0)
  112.         return ret;

  113.     if (eth_dev->data->dev_conf.intr_conf.rxq) {
  114.         if (virtio_configure_intr(eth_dev) < 0) {
  115.             PMD_INIT_LOG(ERR, "failed to configure interrupt");
  116.             return -1;
  117.         }
  118.     }

  119.     /* 通过写寄存器VIRTIO_CONFIG_STATUS_DRIVER_OK,告诉后端前端初始化完成 */
  120.     vtpci_reinit_complete(hw);

  121.     if (pci_dev)
  122.         PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
  123.             eth_dev->data->port_id, pci_dev->id.vendor_id,
  124.             pci_dev->id.device_id);

  125.     return 0;
  126. }

最后用一张图总结一下dpdk前端net-virtio的初始化流程。


阅读(12485) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~