dpdk pci设备初始化
(代码来自dpdk16.11)
——lvyilong316
dpdk pci设备的初始化主要由rte_eal_pci_init 函数完成,它是在rte_eal_init中被调用的。rte_eal_initàrte_eal_pci_init。
l rte_eal_pci_init
-
int
-
rte_eal_pci_init(void)
-
{
-
/* for debug purposes, PCI can be disabled */
-
if (internal_config.no_pci)
-
return 0;
-
-
if (rte_eal_pci_scan() < 0) {
-
RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__);
-
return -1;
-
-
}
-
return 0;
-
}
如果配置了no_pci则直接返回,否则调用rte_eal_pci_scan。
l rte_eal_pci_scan
这个函数主要是遍历系统的/sys/bus/pci/devices目录中的每个子目录,/sys/bus/pci/devices目录中每个子目录都对应一个pci设备,如下图所示。
-
int
-
rte_eal_pci_scan(void)
-
{
-
struct dirent *e;
-
DIR *dir;
-
char dirname[PATH_MAX];
-
struct rte_pci_addr addr;
-
/*打开/sys/bus/pci/devices 目录*/
-
dir = opendir(pci_get_sysfs_path());
-
if (dir == NULL) {
-
RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n",
-
__func__, strerror(errno));
-
return -1;
-
}
-
-
while ((e = readdir(dir)) != NULL) {
-
if (e->d_name[0] == '.')
-
continue;
-
/*根据pci设备标识(地址)如:0000:01:11.4初始化addr中的domain,bus,devid,function*/
-
if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &addr) != 0)
-
continue;
-
-
snprintf(dirname, sizeof(dirname), "%s/%s",
-
pci_get_sysfs_path(), e->d_name);
-
/*读取每个pci设备的目录,创建并初始化rte_pci_device结构,加入全局链表pci_device_list*/
-
if (pci_scan_one(dirname, &addr) < 0)
-
goto error;
-
}
-
closedir(dir);
-
return 0;
-
-
error:
-
closedir(dir);
-
return -1;
-
}
pci_scan_one函数又会去读取每个子目录,每个子目录中包含这个pci设备的属性信息。如下图所示。
根据这些属性信息初始化一个pci设备,这个pci设备用rte_pci_device结构表示。
l pci地址和pci id
这里说明一点,就是pci地址和pci id的区别,在/sys/bus/pci/devices目录下中看到的数字是pci地址,在dpdk中使用rte_pci_addr结构表示。
-
struct rte_pci_addr {
-
uint16_t domain; /**< Device domain */
-
uint8_t bus; /**< Device bus */
-
uint8_t devid; /**< Device ID */
-
uint8_t function; /**< Device function. */
-
};
而pci id由class_id ,vendor_id ,device_id 等组成。dpdk中用rte_pci_id结构表示。
-
struct rte_pci_id {
-
uint32_t class_id; /**< Class ID (class, subclass, pi) or RTE_CLASS_ANY_ID. */
-
uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */
-
uint16_t device_id; /**< Device ID or PCI_ANY_ID. */
-
uint16_t subsystem_vendor_id; /**< Subsystem vendor ID or PCI_ANY_ID. */
-
uint16_t subsystem_device_id; /**< Subsystem device ID or PCI_ANY_ID. */
-
};
l pci_scan_one
-
static int pci_scan_one(const char *dirname, const struct rte_pci_addr *addr)
-
{
-
char filename[PATH_MAX];
-
unsigned long tmp;
-
struct rte_pci_device *dev;
-
char driver[PATH_MAX];
-
int ret;
-
-
dev = malloc(sizeof(*dev));
-
if (dev == NULL)
-
return -1;
-
-
memset(dev, 0, sizeof(*dev));
-
dev->addr = *addr;
-
/*根据设备属性,初始化设备的pci id*/
-
/* get vendor id */
-
snprintf(filename, sizeof(filename), "%s/vendor", dirname);
-
if (eal_parse_sysfs_value(filename, &tmp) < 0) {
-
free(dev);
-
return -1;
-
}
-
dev->id.vendor_id = (uint16_t)tmp;
-
-
/* get device id */
-
snprintf(filename, sizeof(filename), "%s/device", dirname);
-
if (eal_parse_sysfs_value(filename, &tmp) < 0) {
-
free(dev);
-
return -1;
-
}
-
dev->id.device_id = (uint16_t)tmp;
-
-
/* get subsystem_vendor id */
-
snprintf(filename, sizeof(filename), "%s/subsystem_vendor",
-
dirname);
-
if (eal_parse_sysfs_value(filename, &tmp) < 0) {
-
free(dev);
-
return -1;
-
}
-
dev->id.subsystem_vendor_id = (uint16_t)tmp;
-
-
/* get subsystem_device id */
-
snprintf(filename, sizeof(filename), "%s/subsystem_device",
-
dirname);
-
if (eal_parse_sysfs_value(filename, &tmp) < 0) {
-
free(dev);
-
return -1;
-
}
-
dev->id.subsystem_device_id = (uint16_t)tmp;
-
-
/* get class_id */
-
snprintf(filename, sizeof(filename), "%s/class",
-
dirname);
-
if (eal_parse_sysfs_value(filename, &tmp) < 0) {
-
free(dev);
-
return -1;
-
}
-
/* the least 24 bits are valid: class, subclass, program interface */
-
dev->id.class_id = (uint32_t)tmp & RTE_CLASS_ANY_ID;
-
-
/* get max_vfs */
-
/*获取设备的vf个数*/
-
dev->max_vfs = 0;
-
snprintf(filename, sizeof(filename), "%s/max_vfs", dirname);
-
if (!access(filename, F_OK) &&
-
eal_parse_sysfs_value(filename, &tmp) == 0)
-
dev->max_vfs = (uint16_t)tmp;
-
else {
-
/* for non igb_uio driver, need kernel version >= 3.8 */
-
snprintf(filename, sizeof(filename),
-
"%s/sriov_numvfs", dirname);
-
if (!access(filename, F_OK) &&
-
eal_parse_sysfs_value(filename, &tmp) == 0)
-
dev->max_vfs = (uint16_t)tmp;
-
}
-
-
/* get numa node */
-
/*如果设备开启numa,则在设备属性中会有numa_node的属性*/
-
snprintf(filename, sizeof(filename), "%s/numa_node",
-
dirname);
-
if (access(filename, R_OK) != 0) {
-
/* if no NUMA support, set default to 0 */
-
dev->device.numa_node = 0;
-
} else {
-
if (eal_parse_sysfs_value(filename, &tmp) < 0) {
-
free(dev);
-
return -1;
-
}
-
dev->device.numa_node = tmp;
-
}
-
-
/* parse resources */
-
/*获取pci设备映射的地址空间*/
-
snprintf(filename, sizeof(filename), "%s/resource", dirname);
-
if (pci_parse_sysfs_resource(filename, dev) < 0) {
-
RTE_LOG(ERR, EAL, "%s(): cannot parse resource\n", __func__);
-
free(dev);
-
return -1;
-
}
-
-
/* parse driver */
-
/*获取pci设备关联的驱动,如/sys/bus/pci/devices/0000\:01\:10.2/driver*/
-
snprintf(filename, sizeof(filename), "%s/driver", dirname);
-
/*使用readlink获取驱动的名称*/
-
ret = pci_get_kernel_driver_by_path(filename, driver);
-
if (ret < 0) {
-
RTE_LOG(ERR, EAL, "Fail to get kernel driver\n");
-
free(dev);
-
return -1;
-
}
-
-
if (!ret) { /*如果设备有关联驱动*/
-
if (!strcmp(driver, "vfio-pci"))
-
dev->kdrv = RTE_KDRV_VFIO; /*目前dpdk只支持三种驱动:vfio-pci,igb_uio,uio_pci_generic*/
-
else if (!strcmp(driver, "igb_uio"))
-
dev->kdrv = RTE_KDRV_IGB_UIO;
-
else if (!strcmp(driver, "uio_pci_generic"))
-
dev->kdrv = RTE_KDRV_UIO_GENERIC;
-
else
-
dev->kdrv = RTE_KDRV_UNKNOWN;
-
} else
-
dev->kdrv = RTE_KDRV_NONE;
-
-
/* device is valid, add in list (sorted) */
-
if (TAILQ_EMPTY(&pci_device_list)) { /*如果pci_device_list为空,直接将当前设备插入链表*/
-
rte_eal_device_insert(&dev->device);
-
TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
-
} else {
-
struct rte_pci_device *dev2;
-
int ret;
-
/*将pci设备按照pci地址由大到小插入pci_device_list*/
-
TAILQ_FOREACH(dev2, &pci_device_list, next) {
-
ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr);
-
if (ret > 0)
-
continue;
-
-
if (ret < 0) {
-
TAILQ_INSERT_BEFORE(dev2, dev, next);
-
rte_eal_device_insert(&dev->device);
-
} else { /* already registered */
-
dev2->kdrv = dev->kdrv;
-
dev2->max_vfs = dev->max_vfs;
-
memmove(dev2->mem_resource, dev->mem_resource,
-
sizeof(dev->mem_resource));
-
free(dev);
-
}
-
return 0;
-
}
-
rte_eal_device_insert(&dev->device);
-
TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
-
}
-
-
return 0;
-
}
其中一步需要提一下,就是打开pci设备目录下的resource文件,获取pci设备的地址空间(也就是pci的BAR),并将其保存在dev->mem_resource中。这个pci地址在后面pci资源映射中会用到。
通过readlink pci设备目录下的driver属性获取pci设备的驱动,目前dpdk只支持三种pci驱动:vfio-pci,igb_uio,uio_pci_generic。
最后将创建初始化好的rte_pci_device结构按照pci地址由大到小插入全局链表pci_device_list中。相关数据结构关系如下图所示。
阅读(23046) | 评论(0) | 转发(1) |