dpdk net_virtio前端驱动实现分析
——lvyilong316
和kernel中的vhost-net对应,net_virtio是dpdk中实现的网络virtio的前端。相对于kernel dpdk的net_virtio实现要简单很多,但是调理更加清晰。所以当我们想了解前端是如何工作的时候,分析dpdk net_virtio网络前端是一个有效的方法。下面以dpdk 18.11为例分析一下net_virtio前端的大体实现流程。
和传统驱动一样,首先要从probe函数开始,net_virtio的probe函数为eth_virtio_pci_probe,通过以下结构体在INIT时注册:
-
static struct rte_pci_driver rte_virtio_pmd = {
-
.driver = {
-
.name = "net_virtio",
-
},
-
.id_table = pci_id_virtio_map,
-
.drv_flags = 0,
-
.probe = eth_virtio_pci_probe,
-
.remove = eth_virtio_pci_remove,
-
};
-
-
RTE_INIT(rte_virtio_pmd_init)
-
{
-
rte_eal_iopl_init();
-
rte_pci_register(&rte_virtio_pmd);
-
}
在分析eth_virtio_pci_probe函数前,我们先看pci_id_virtio_map:
-
/* VirtIO PCI vendor/device ID. */
-
#define VIRTIO_PCI_VENDORID 0x1AF4
-
#define VIRTIO_PCI_LEGACY_DEVICEID_NET 0x1000
-
#define VIRTIO_PCI_MODERN_DEVICEID_NET 0x1041
-
-
/*
-
* The set of PCI devices this driver supports
-
*/
-
static const struct rte_pci_id pci_id_virtio_map[] = {
-
{ RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_LEGACY_DEVICEID_NET) },
-
{ RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_MODERN_DEVICEID_NET) },
-
{ .vendor_id = 0, /* sentinel */ },
-
};
可以看到前端驱动支持两种类型的virtio网络设备,一种是legacy的一种是modern的。所以之后的处理逻辑中我们将会看到有很多地方是需要区别处理的。
l eth_virtio_pci_probe
-
static int eth_virtio_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
-
struct rte_pci_device *pci_dev)
-
{
-
if (rte_eal_iopl_init() != 0) { /*通过iopl函数为调用进程设置I/O端口访问权限,只在i386平台需要*/
-
PMD_INIT_LOG(ERR, "IOPL call failed - cannot use virtio PMD");
-
return 1;
-
}
-
-
/* virtio pmd skips probe if device needs to work in vdpa mode */
-
/* 如果指定了vdpa模式就跳过后续的probe流程 */
-
if (vdpa_mode_selected(pci_dev->device.devargs))
-
return 1;
-
-
return rte_eth_dev_pci_generic_probe(pci_dev, sizeof(struct virtio_hw),
-
eth_virtio_dev_init);
-
}
l rte_eth_dev_pci_generic_probe
-
static inline int
-
rte_eth_dev_pci_generic_probe(struct rte_pci_device *pci_dev,
-
size_t private_data_size, eth_dev_pci_callback_t dev_init)
-
{
-
struct rte_eth_dev *eth_dev;
-
int ret;
-
/* private_data_size = sizeof(struct virtio_hw) */
-
/* 从全局rte_eth_devices[]数组中分配struct rte_eth_dev结构,确定port_id,
-
* 分配私有数据eth_dev->data->dev_private指向struct virtio_hw */
-
eth_dev = rte_eth_dev_pci_allocate(pci_dev, private_data_size);
-
if (!eth_dev)
-
return -ENOMEM;
-
-
RTE_FUNC_PTR_OR_ERR_RET(*dev_init, -EINVAL);
-
ret = dev_init(eth_dev); /* eth_virtio_dev_init */
-
if (ret)
-
rte_eth_dev_pci_release(eth_dev);
-
else
-
rte_eth_dev_probing_finish(eth_dev);
-
-
return ret;
-
}
其中首先调用rte_eth_dev_pci_allocate,该函数从全局rte_eth_devices[]数组中分配struct
rte_eth_dev结构,确定port_id,同时分配私有数据eth_dev->data->dev_private指向struct virtio_hw;然后调用dev_init,也就是参数eth_virtio_dev_init函数,我们后面重点分析;最后调用rte_eth_dev_probing_finish,触发RTE_ETH_EVENT_NEW事件,将设备状态设置为RTE_ETH_DEV_ATTACHED。相关数据结构关系如下图:
l rte_eth_dev_probing_finish
-
void
-
rte_eth_dev_probing_finish(struct rte_eth_dev *dev)
-
{
-
if (dev == NULL)
-
return;
-
-
_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_NEW, NULL);
-
-
dev->state = RTE_ETH_DEV_ATTACHED;
-
}
下面重点看下net-virtio设备的核心初始化函数eth_virtio_dev_init。
l eth_virtio_dev_init
-
int
-
eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
-
{
-
struct virtio_hw *hw = eth_dev->data->dev_private;
-
int ret;
-
-
RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr_mrg_rxbuf));
-
-
eth_dev->dev_ops = &virtio_eth_dev_ops; /* 初始化virtio的设备处理函数 */
-
-
if (rte_eal_process_type() == RTE_PROC_SECONDARY) { /* 如果进程是SECONDARY的处理逻辑 */
-
/....../
-
return 0;
-
}
-
-
/* Allocate memory for storing MAC addresses */
-
eth_dev->data->mac_addrs = rte_zmalloc("virtio", VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN, 0);
-
if (eth_dev->data->mac_addrs == NULL) {
-
return -ENOMEM;
-
}
-
-
hw->port_id = eth_dev->data->port_id;
-
/* For virtio_user case the hw->virtio_user_dev is populated by
-
* virtio_user_eth_dev_alloc() before eth_virtio_dev_init() is called.
-
*/
-
/* 如果不是virtio_user设备 */
-
if (!hw->virtio_user_dev) {
-
ret = vtpci_init(RTE_ETH_DEV_TO_PCI(eth_dev), hw);/* 设备的pci信息初始化*/
-
if (ret)
-
goto out;
-
}
-
-
/* reset device and negotiate default features */
-
/* 完成前后端协商,确定feature的支持 */
-
ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES);
-
if (ret < 0)
-
goto out;
-
-
return 0;
-
-
out:
-
rte_free(eth_dev->data->mac_addrs);
-
return ret;
-
}
其中关键是调用了两个函数,一个是vtpci_init ,该函数读取设备的PCI配置空间进行初始化,同时判断设备是modern还是lagecy;另一个是virtio_init_device ,该函数主要完成前后端协商,进一步初始化设备,如确定设备支持feature等。下面首先分析vtpci_init。
l vtpci_init
-
int
-
vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw)
-
{
-
/*
-
* Try if we can succeed reading virtio pci caps, which exists
-
* only on modern pci device. If failed, we fallback to legacy
-
* virtio handling.
-
*/
-
if (virtio_read_caps(dev, hw) == 0) {
-
PMD_INIT_LOG(INFO, "modern virtio pci detected.");
-
virtio_hw_internal[hw->port_id].vtpci_ops = &modern_ops;
-
hw->modern = 1;
-
return 0;
-
}
-
-
PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
-
/* 对于lagacy需要进行ioport mmap */
-
if (rte_pci_ioport_map(dev, 0, VTPCI_IO(hw)) < 0) {
-
return -1;
-
}
-
-
virtio_hw_internal[hw->port_id].vtpci_ops = &legacy_ops;
-
hw->modern = 0;
-
-
return 0;
-
}
其中主要通过virtio_read_caps 读取PCI配置空间,初始化设备,然后根据PCI支持的能力确定设备是modern还是lagacy,分别初始化对应的ops。我们看下virtio_read_caps 中是根据什么判断设备是modern的。
l virtio_read_caps
-
static int
-
virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
-
{
-
uint8_t pos;
-
struct virtio_pci_cap cap;
-
int ret;
-
/* 通过UIO或VFIO进行mmap */
-
if (rte_pci_map_device(dev)) {
-
PMD_INIT_LOG(DEBUG, "failed to map pci device!");
-
return -1;
-
}
-
/* 读取PCI设备capability list */
-
ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
-
if (ret != 1) {
-
PMD_INIT_LOG(DEBUG,
-
"failed to read pci capability list, ret %d", ret);
-
return -1;
-
}
-
/* 读取PCI设备配置空间capability,初始化hw中的字段,如是否支持MSIX等 */
-
while (pos) {
-
ret = rte_pci_read_config(dev, &cap, 2, pos);
-
if (ret != 2) {
-
PMD_INIT_LOG(DEBUG,
-
"failed to read pci cap at pos: %x ret %d",
-
pos, ret);
-
break;
-
}
-
-
if (cap.cap_vndr == PCI_CAP_ID_MSIX) {
-
/* Transitional devices would also have this capability,
-
* that's why we also check if msix is enabled.
-
* 1st byte is cap ID; 2nd byte is the position of next
-
* cap; next two bytes are the flags.
-
*/
-
uint16_t flags;
-
-
ret = rte_pci_read_config(dev, &flags, sizeof(flags),
-
pos + 2);
-
if (ret != sizeof(flags)) {
-
PMD_INIT_LOG(DEBUG,
-
"failed to read pci cap at pos:"
-
" %x ret %d", pos + 2, ret);
-
break;
-
}
-
-
if (flags & PCI_MSIX_ENABLE)
-
hw->use_msix = VIRTIO_MSIX_ENABLED;
-
else
-
hw->use_msix = VIRTIO_MSIX_DISABLED;
-
}
-
-
if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
-
PMD_INIT_LOG(DEBUG,
-
"[%2x] skipping non VNDR cap id: %02x",
-
pos, cap.cap_vndr);
-
goto next;
-
}
-
-
ret = rte_pci_read_config(dev, &cap, sizeof(cap), pos);
-
if (ret != sizeof(cap)) {
-
PMD_INIT_LOG(DEBUG,
-
"failed to read pci cap at pos: %x ret %d",
-
pos, ret);
-
break;
-
}
-
-
PMD_INIT_LOG(DEBUG,
-
"[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u",
-
pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
-
-
switch (cap.cfg_type) {
-
case VIRTIO_PCI_CAP_COMMON_CFG:
-
hw->common_cfg = get_cfg_addr(dev, &cap);
-
break;
-
case VIRTIO_PCI_CAP_NOTIFY_CFG:
-
ret = rte_pci_read_config(dev,
-
&hw->notify_off_multiplier,
-
4, pos + sizeof(cap));
-
if (ret != 4)
-
PMD_INIT_LOG(DEBUG,
-
"failed to read notify_off_multiplier, ret %d",
-
ret);
-
else
-
hw->notify_base = get_cfg_addr(dev, &cap);
-
break;
-
case VIRTIO_PCI_CAP_DEVICE_CFG:
-
hw->dev_cfg = get_cfg_addr(dev, &cap);
-
break;
-
case VIRTIO_PCI_CAP_ISR_CFG:
-
hw->isr = get_cfg_addr(dev, &cap);
-
break;
-
}
-
-
next:
-
pos = cap.cap_next;
-
}
-
/* 判断设备是否是modern的,注意只有这些配置都支持才能是modern */
-
if (hw->common_cfg == NULL || hw->notify_base == NULL ||
-
hw->dev_cfg == NULL || hw->isr == NULL) {
-
PMD_INIT_LOG(INFO, "no modern virtio pci device found.");
-
return -1;
-
}
-
-
PMD_INIT_LOG(INFO, "found modern virtio pci device.");
-
-
PMD_INIT_LOG(DEBUG, "common cfg mapped at: %p", hw->common_cfg);
-
PMD_INIT_LOG(DEBUG, "device cfg mapped at: %p", hw->dev_cfg);
-
PMD_INIT_LOG(DEBUG, "isr cfg mapped at: %p", hw->isr);
-
PMD_INIT_LOG(DEBUG, "notify base: %p, notify off multiplier: %u",
-
hw->notify_base, hw->notify_off_multiplier);
-
-
return 0;
-
}
可以看到modern设备需要同时满足四个条件:hw->common_cfg == NULL ,hw->notify_base == NULL,hw->dev_cfg == NULL, hw->isr == NULL 。
下面看前后端协商的初始化函数virtio_init_device。注意其第二个调用参数为VIRTIO_PMD_DEFAULT_GUEST_FEATURES ,这个是代码中定义的前端支持的全量feature。
ret = virtio_init_device(eth_dev,
VIRTIO_PMD_DEFAULT_GUEST_FEATURES);
l virtio_init_device
-
static int
-
virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
-
{
-
struct virtio_hw *hw = eth_dev->data->dev_private;
-
struct virtio_net_config *config;
-
struct virtio_net_config local_config;
-
struct rte_pci_device *pci_dev = NULL;
-
int ret;
-
-
/* Reset the device although not necessary at startup */
-
/* 调用ops的set_status(对于modern设备来说是modern_set_status),设置状态为VIRTIO_CONFIG_STATUS_RESET */
-
vtpci_reset(hw);
-
-
if (hw->vqs) { /* 如果支持初始化过队列则释放掉 */
-
virtio_dev_free_mbufs(eth_dev);
-
virtio_free_queues(hw);
-
}
-
-
/* Tell the host we've noticed this device. */
-
vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
-
-
/* Tell the host we've known how to drive the device. */
-
vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
-
/* 完成前后端的feature协商,反馈设置后端feature */
-
if (virtio_negotiate_features(hw, req_features) < 0)
-
return -1;
-
-
if (!hw->virtio_user_dev) {
-
pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
-
rte_eth_copy_pci_info(eth_dev, pci_dev);
-
}
-
-
/* If host does not support both status and MSI-X then disable LSC */
-
if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS) &&
-
hw->use_msix != VIRTIO_MSIX_NONE)
-
eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
-
else
-
eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
-
-
/* Setting up rx_header size for the device */
-
if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
-
vtpci_with_feature(hw, VIRTIO_F_VERSION_1))
-
hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
-
else
-
hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
-
-
/* Copy the permanent MAC address to: virtio_hw */
-
virtio_get_hwaddr(hw);
-
ether_addr_copy((struct ether_addr *) hw->mac_addr,
-
ð_dev->data->mac_addrs[0]);
-
PMD_INIT_LOG(DEBUG,
-
"PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
-
hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
-
hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
-
/* 通过协商后端feature,初始化设备的其他字段,如mtu,MQ的支持 */
-
if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
-
config = &local_config;
-
-
vtpci_read_dev_config(hw,
-
offsetof(struct virtio_net_config, mac),
-
&config->mac, sizeof(config->mac));
-
-
if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
-
vtpci_read_dev_config(hw,
-
offsetof(struct virtio_net_config, status),
-
&config->status, sizeof(config->status));
-
} else {
-
PMD_INIT_LOG(DEBUG,
-
"VIRTIO_NET_F_STATUS is not supported");
-
config->status = 0;
-
}
-
-
if (vtpci_with_feature(hw, VIRTIO_NET_F_MQ)) {
-
vtpci_read_dev_config(hw,
-
offsetof(struct virtio_net_config, max_virtqueue_pairs),
-
&config->max_virtqueue_pairs,
-
sizeof(config->max_virtqueue_pairs));
-
} else {
-
PMD_INIT_LOG(DEBUG,
-
"VIRTIO_NET_F_MQ is not supported");
-
config->max_virtqueue_pairs = 1;
-
}
-
-
hw->max_queue_pairs = config->max_virtqueue_pairs;
-
-
if (vtpci_with_feature(hw, VIRTIO_NET_F_MTU)) {
-
vtpci_read_dev_config(hw,
-
offsetof(struct virtio_net_config, mtu),
-
&config->mtu,
-
sizeof(config->mtu));
-
-
/*
-
* MTU value has already been checked at negotiation
-
* time, but check again in case it has changed since
-
* then, which should not happen.
-
*/
-
if (config->mtu < ETHER_MIN_MTU) {
-
PMD_INIT_LOG(ERR, "invalid max MTU value (%u)",
-
config->mtu);
-
return -1;
-
}
-
-
hw->max_mtu = config->mtu;
-
/* Set initial MTU to maximum one supported by vhost */
-
eth_dev->data->mtu = config->mtu;
-
-
} else {
-
hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - ETHER_HDR_LEN -
-
VLAN_TAG_LEN - hw->vtnet_hdr_size;
-
}
-
-
PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
-
config->max_virtqueue_pairs);
-
PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
-
PMD_INIT_LOG(DEBUG,
-
"PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
-
config->mac[0], config->mac[1],
-
config->mac[2], config->mac[3],
-
config->mac[4], config->mac[5]);
-
} else {
-
PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1");
-
hw->max_queue_pairs = 1;
-
hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - ETHER_HDR_LEN -
-
VLAN_TAG_LEN - hw->vtnet_hdr_size;
-
}
-
/* 分配初始化设备的vq队列 */
-
ret = virtio_alloc_queues(eth_dev);
-
if (ret < 0)
-
return ret;
-
-
if (eth_dev->data->dev_conf.intr_conf.rxq) {
-
if (virtio_configure_intr(eth_dev) < 0) {
-
PMD_INIT_LOG(ERR, "failed to configure interrupt");
-
return -1;
-
}
-
}
-
-
/* 通过写寄存器VIRTIO_CONFIG_STATUS_DRIVER_OK,告诉后端前端初始化完成 */
-
vtpci_reinit_complete(hw);
-
-
if (pci_dev)
-
PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
-
eth_dev->data->port_id, pci_dev->id.vendor_id,
-
pci_dev->id.device_id);
-
-
return 0;
-
}
最后用一张图总结一下dpdk前端net-virtio的初始化流程。
阅读(12729) | 评论(0) | 转发(0) |