我们以e1000g网卡驱动为例,驱动程序由pci_config_setup(9F)或者ddi_regs_map_setup(9F)得到访问配置地址空间的句柄。下面我们通过mdb来看看这个句柄是个怎样的数据结构。
# mdb -k
> ::prtconf ! grep e1000g
300004271b0 pci8086,1001, instance #0 (driver name: e1000g)
> 300004271b0::print -t struct dev_info devi_driver_data |::print -t e1000g_t osdep.cfg_handle |::print -t struct ddi_acc_hdl ah_addr
caddr_t ah_addr = 0x29ebf9e3800
> 0x29ebf9e3800::sfmmu_vtop
virtual 29ebf9e3800 mapped to physical 7fe00001800
x86使用::vatopfn
例如:
> 0xffffff00bf0fc000::vatopfn
level=0 htable=ffffff01031803d8 pte=80000000f00c8573
level=1 htable=ffffff0103180a98 pte=583c5027
level=2 htable=ffffff0103157df0 pte=5a66d027
level=3 htable=ffffff01031572b0 pte=5afff027
Virtual address 0xffffff00bf0fc000 maps pfn 0xf00c8
> 300004271b0::print -t struct dev_info devi_driver_data |::print -t e1000g_t osdep.cfg_handle |::print -t struct ddi_acc_hdl
{
int ah_vers = 0x1
void *ah_bus_private = 0
void *ah_platform_private = 0x60011a99cc0
dev_info_t *ah_dip = 0x300004271b0
uint_t ah_rnumber = 0
caddr_t ah_addr = 0x29ebf9e3800
off_t ah_offset = 0
off_t ah_len = 0
uint_t ah_hat_flags = 0x2000
pfn_t ah_pfn = 0x3ff00000
uint_t ah_pnum = 0x1
ulong_t ah_xfermodes = 0
ddi_device_acc_attr_t ah_acc = {
ushort_t devacc_attr_version = 0x2
uchar_t devacc_attr_endian_flags = 0x1
uchar_t devacc_attr_dataorder = 0
uchar_t devacc_attr_access = 0x2
}
}
驱动程序通过pci_config_setup(9F)或者ddi_regs_map_setup(9F)建立了PCI设备配置地址空间和内核虚拟地址之间的映射,在上例中(SPARC),配置地址空间的物理地址是0x7fe00001800,内核虚拟地址是0x29ebf9e3800。这个物理地址也可以通过pcitool得到验证:
# pcitool -v
...
Bus Number: 0 Device Number: 3 Function Number: 0
Physical Address: 0x7fe00001800
Vendor ID: 8086
Device ID: 1026
Command: 0157
Status: 0230
Revision ID: 04
Class Code: 020000
Cache Line Size: 10
Latency Timer: 40
Header Type: 00
BIST: 00
Base Address Register 0 (@10): 00100004
Base Address Register 1 (@14): 00000000
Base Address Register 2 (@18): 00140004
Base Address Register 3 (@1C): 00000000
Base Address Register 4 (@20): 00000941
Base Address Register 5 (@24): 00000000
Expansion ROM Base Address Register (@30): 00180000
...
BAR0的内核虚拟地址和物理地址也可以通过类似的方法得到:
# mdb -k
> ::prtconf ! grep e1000g
300004271b0 pci8086,1001, instance #0 (driver name: e1000g)
> 300004271b0::print -t struct dev_info devi_driver_data |::print -t e1000g_t osdep.reg_handle |::print -t struct ddi_acc_hdl ah_addr
caddr_t ah_addr = 0x29ebf9e4000
> 0x29ebf9e4000::sfmmu_vtop
virtual 29ebf9e4000 mapped to physical 7ff00100000
> 300004271b0::print -t struct dev_info devi_driver_data |::print -t e1000g_t osdep.reg_handle |::print -t struct ddi_acc_hdl
{
int ah_vers = 0x1
void *ah_bus_private = 0
void *ah_platform_private = 0x60011a99b80
dev_info_t *ah_dip = 0x300004271b0
uint_t ah_rnumber = 0x1
caddr_t ah_addr = 0x29ebf9e4000
off_t ah_offset = 0
off_t ah_len = 0x20000
uint_t ah_hat_flags = 0x2000
pfn_t ah_pfn = 0x3ff80080
uint_t ah_pnum = 0x10
ulong_t ah_xfermodes = 0
ddi_device_acc_attr_t ah_acc = {
ushort_t devacc_attr_version = 0x1
uchar_t devacc_attr_endian_flags = 0x1
uchar_t devacc_attr_dataorder = 0
uchar_t devacc_attr_access = 0x1
}
}
或者
> 300004271b0::print -t struct dev_info devi_driver_data |::print -t e1000g_t shared.hw_addr
u8 *shared.hw_addr = 0x29ebf9e4000
每个PCI function有256字节配置空间,可以通过CF8/CFC的方法读取相应配置空间的内容。PCI Express的配置空间则可以达到4K。因此所有配置空间可多达256M(2^8 * 2^5 * 2^3 * 4K = 256M)。配置空间初始的物理地址在x86平台上由ACPI的MCFG table提供,可以通过下面的C程序提供相应PCI设备的配置空间相对物理地址。
#include
#define PCI_REG_FUNC_SHIFT 8 /* Offset of function bits */
#define PCI_REG_DEV_SHIFT 11 /* Offset of device bits */
#define PCI_REG_BUS_SHIFT 16 /* Offset of bus bits */
#define PCIEX_BDF_OFFSET_DELTA 4
#define PCIEX_REG_FUNC_SHIFT (PCI_REG_FUNC_SHIFT + PCIEX_BDF_OFFSET_DELTA)
#define PCIEX_REG_DEV_SHIFT (PCI_REG_DEV_SHIFT + PCIEX_BDF_OFFSET_DELTA)
#define PCIEX_REG_BUS_SHIFT (PCI_REG_BUS_SHIFT + PCIEX_BDF_OFFSET_DELTA)
int main(int argc, char *argv[])
{
int phys_addr, bus_no=0, dev_no=0, func_no=1;
phys_addr =
((bus_no << PCIEX_REG_BUS_SHIFT) |
(dev_no << PCIEX_REG_DEV_SHIFT) |
(func_no << PCIEX_REG_FUNC_SHIFT));
printf("%x\n", phys_addr);
return 0;
}
那么如何通过mdb来找到这个初始物理地址呢?
# mdb -k
> ::prtconf ! grep npe
ffffff0147cf7788 pciex_root_complex, instance #0 (driver name: npe)
> ffffff0147cf7788::print -t struct dev_info devi_hw_prop_ptr |::print -t ddi_prop_t
{
struct ddi_prop *prop_next = 0xffffff01484df260
dev_t prop_dev = 0xffffffffffffffff
char *prop_name = 0xffffff0148513c70 "available"
int prop_flags = 0x100
int prop_len = 0x438
caddr_t prop_val = 0xffffff01471c4b80
}
> 0xffffff01484df260::print -t struct ddi_prop
{
struct ddi_prop *prop_next = 0xffffff01484df288
dev_t prop_dev = 0xffffffffffffffff
char *prop_name = 0xffffff0148513c80 "bus-range"
int prop_flags = 0x100
int prop_len = 0x8
caddr_t prop_val = 0xffffff0148343be8
}
> 0xffffff01484df288::print -t struct ddi_prop
{
struct ddi_prop *prop_next = 0xffffff01484df5f8
dev_t prop_dev = 0xffffffffffffffff
char *prop_name = 0xffffff0148343bd8 "ranges"
int prop_flags = 0x100
int prop_len = 0x150
caddr_t prop_val = 0xffffff01484a9680
}
> 0xffffff01484df5f8::print -t struct ddi_prop
{
struct ddi_prop *prop_next = 0xffffff01484dfda0
dev_t prop_dev = 0xffffffffffffffff
char *prop_name = 0xffffff0148513eb0 "acpi-namespace"
int prop_flags = 0x200
int prop_len = 0xb
caddr_t prop_val = 0xffffff0148513e90
}
> 0xffffff01484dfda0::print -t struct ddi_prop
{
struct ddi_prop *prop_next = 0xffffff0146e827d8
dev_t prop_dev = 0xffffffffffffffff
char *prop_name = 0xffffff0148343a80 "ecfg"
int prop_flags = 0x1000
int prop_len = 0x20
caddr_t prop_val = 0xffffff01484dde70
}
> 0xffffff01484dde70/K
0xffffff01484dde70: f4000000
或者更简单一点
> ffffff0147cf7788::devinfo
...
name='ecfg' type=int64 items=4
value=00000000f4000000.0000000000000000.0000000000000000.000000000000003f
...
# pcitool -v | grep Physical
Physical Address: 0xf4000000
Physical Address: 0xf4008000
Physical Address: 0xf40d0000
Physical Address: 0xf40d1000
Physical Address: 0xf40d7000
Physical Address: 0xf40d8000
Physical Address: 0xf40e0000
Physical Address: 0xf40e1000
Physical Address: 0xf40e3000
Physical Address: 0xf40e5000
Physical Address: 0xf40e8000
Physical Address: 0xf40e9000
Physical Address: 0xf40ea000
Physical Address: 0xf40ef000
Physical Address: 0xf40f0000
Physical Address: 0xf40f8000
Physical Address: 0xf40f9000
Physical Address: 0xf40fa000
Physical Address: 0xf40fb000
Physical Address: 0xf4100000
Physical Address: 0xf4308000
Physical Address: 0xf4309000
Physical Address: 0xf430a000
Physical Address: 0xf430b000
Physical Address: 0xf430c000
Physical Address: 0xf4900000
Physical Address: 0xf4c00000
通过mdb我们找到了配置空间的初始物理地址0xf4000000,根据PCI设备的BDF,结合上述C程序就可以验证由pcitool得到的配置空间物理地址了。至于为什么该地址保存在属性"ecfg"中,可以参考函数npe_query_acpi_mcfg。
Solaris在SPARC平台下没有scanpci和lspci等相应的命令,prtconf是一个可以得到系统配置信息的命令,当然也包括PCI设备。
# prtconf -vp
...
Node 0xf00d1b0c
assigned-addresses: 83001810.00000000.00100000.00000000.00020000.83001818.00000000.00140000.00000000.00040000.81001820.00000000.00000940.0000000
0.00000040.82001830.00000000.00180000.00000000.00040000
reg: 00001800.00000000.00000000.00000000.00000000.03001810.00000000.00000000.00000000.00020000.03001818.00000000.00000000.00000000.00040000.0100
1820.00000000.00000000.00000000.00000040.02001830.00000000.00000000.00000000.00040000
compatible: 'pci8086,1026.8086.1001.4' + 'pci8086,1026.8086.1001' + 'pci8086,1001' + 'pci8086,1026.4' + 'pci8086,1026' + 'pciclass,020000' + 'pciclass,0200'
name: 'ethernet'
66mhz-capable:
devsel-speed: 00000001
class-code: 00020000
interrupts: 00000001
latency-timer: 00000040
cache-line-size: 00000010
max-latency: 00000000
min-grant: 000000ff
subsystem-id: 00001001
subsystem-vendor-id: 00008086
revision-id: 00000004
device-id: 00001026
vendor-id: 00008086
...
其中reg属性中包含了什么信息可以从pci(4) man page中得到,也可以通过下面的C程序得到。
#include
/*
Bits 0 - 7 8-bit register number
Bits 8 - 10 3-bit function number
Bits 11 - 15 5-bit device number
Bits 16 - 23 8-bit bus number
Bits 24 - 25 2-bit address space type identifier
Bits 28 - 31 Register number extended bits 8:11
for extended config space. Zero for
conventional configuration space.
The address space type identifier can be
interpreted as follows:
0x0 configuration space
0x1 I/O space
0x2 32-bit memory space address
0x3 64-bit memory space address
*/
#define REG 0x000000ff
#define FUN 0x00000700
#define DEV 0x0000f800
#define BUS 0x00ff0000
#define ADD 0x03000000
#define EXT 0xf0000000
int main(int argc, char *argv[])
{
int i, regs[]={
/*
0x0200c810,
0x8200c814,
0x8100c818,
0x0000c800,
0x0200c810,
0x0200c814,
0x0100c818,
0,
0x83001110,
0x83001010,
0,
0x83001810,
*/
0x00001800,
0x03001810,
0x03001818,
0x01001820,
0x02001830,
1 };
for(i=0; regs[i] != 1; i++) {
if (regs[i] == 0) {
printf("\n");
continue;
}
printf("REG: %x\t", regs[i] & REG);
printf("FUN: %x\t", (regs[i] & FUN) >> 8);
printf("DEV: %x\t", (regs[i] & DEV) >> 11);
printf("BUS: %x\t", (regs[i] & BUS) >> 16);
printf("ADD: %x\t", (regs[i] & ADD) >> 24);
printf("EXT: %x\n", (regs[i] & EXT) >> 28);
}
return 0;
}
# ./reg
REG: 0 FUN: 0 DEV: 3 BUS: 0 ADD: 0 EXT: 0
REG: 10 FUN: 0 DEV: 3 BUS: 0 ADD: 3 EXT: 0
REG: 18 FUN: 0 DEV: 3 BUS: 0 ADD: 3 EXT: 0
REG: 20 FUN: 0 DEV: 3 BUS: 0 ADD: 1 EXT: 0
REG: 30 FUN: 0 DEV: 3 BUS: 0 ADD: 2 EXT: 0
最近发现了一个可以解析prtconf输出的工具,非常好用:
http://blogs.sun.com/dmick/entry/prtpci_digest_and_display_prtconf