To para drivers, platform-pci device just like a bridge which establish the communication
channel such as evtchn, grant table etc. between domU and dom0, so, it's very important
[platform-pci.c]
static struct pci_driver platform_driver = {
name: DRV_NAME,
probe: platform_pci_init,
id_table: platform_pci_tbl,
};
/*
* The pci_dev structure is used to describe PCI devices.
* include/linux/pci.h
*/
struct pci_dev {
struct list_head bus_list; /* node in per-bus list */
struct pci_bus *bus; /* bus this device is on */
struct pci_bus *subordinate; /* bus this device bridges to */
void *sysdata; /* hook for sys-specific extension */
struct proc_dir_entry *procent; /* device entry in /proc/bus/pci */
struct pci_slot *slot; /* Physical slot this device is in */
unsigned int devfn; /* encoded device & function index */
unsigned short vendor;
unsigned short device;
unsigned short subsystem_vendor;
unsigned short subsystem_device;
unsigned int class; /* 3 bytes: (base,sub,prog-if) */
u8 revision; /* PCI revision, low byte of class word */
u8 hdr_type; /* PCI header type (`multi' flag masked out) */
u8 pcie_type; /* PCI-E device/port type */
u8 rom_base_reg; /* which config register controls the ROM */
u8 pin; /* which interrupt pin this device uses */
struct pci_driver *driver; /* which driver has allocated this device */
u64 dma_mask; /* Mask of the bits of bus address this
device implements. Normally this is
0xffffffff. You only need to change
this if your device has broken DMA
or supports 64-bit transfers. */
struct device_dma_parameters dma_parms;
pci_power_t current_state; /* Current operating state. In ACPI-speak,
this is D0-D3, D0 being fully functional,
and D3 being off. */
int pm_cap; /* PM capability offset in the
configuration space */
unsigned int pme_support:5; /* Bitmask of states from which PME#
can be generated */
unsigned int d1_support:1; /* Low power state D1 is supported */
unsigned int d2_support:1; /* Low power state D2 is supported */
unsigned int no_d1d2:1; /* Only allow D0 and D3 */
unsigned int wakeup_prepared:1;
#ifdef CONFIG_PCIEASPM
struct pcie_link_state *link_state; /* ASPM link state. */
#endif
pci_channel_state_t error_state; /* current connectivity state */
struct device dev; /* Generic device interface */
int cfg_size; /* Size of configuration space */
/*
* Instead of touching interrupt line and base address registers
* directly, use the values stored here. They might be different!
*/
unsigned int irq;
struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
/* These fields are used by common fixups */
unsigned int transparent:1; /* Transparent PCI bridge */
unsigned int multifunction:1;/* Part of multi-function device */
/* keep track of device state */
unsigned int is_added:1;
unsigned int is_busmaster:1; /* device is busmaster */
unsigned int no_msi:1; /* device may not use msi */
unsigned int block_ucfg_access:1; /* userspace config space access is blocked */
unsigned int broken_parity_status:1; /* Device generates false positive parity */
unsigned int irq_reroute_variant:2; /* device needs IRQ rerouting variant */
unsigned int msi_enabled:1;
unsigned int msix_enabled:1;
unsigned int ari_enabled:1; /* ARI forwarding */
unsigned int is_managed:1;
unsigned int is_pcie:1;
unsigned int needs_freset:1; /* Dev requires fundamental reset */
unsigned int state_saved:1;
unsigned int is_physfn:1;
unsigned int is_virtfn:1;
unsigned int reset_fn:1;
unsigned int is_hotplug_bridge:1;
pci_dev_flags_t dev_flags;
atomic_t enable_cnt; /* pci_enable_device has been called */
u32 saved_config_space[16]; /* config space saved at suspend time */
struct hlist_head saved_cap_space;
struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */
int rom_attr_enabled; /* has display of the rom attribute been enabled? */
struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */
struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
#ifdef CONFIG_PCI_MSI
struct list_head msi_list;
#endif
struct pci_vpd *vpd;
#ifdef CONFIG_PCI_IOV
union {
struct pci_sriov *sriov; /* SR-IOV capability related */
struct pci_dev *physfn; /* the PF this VF is associated with */
};
struct pci_ats *ats; /* Address Translation Service */
#endif
}
static int __devinit platform_pci_init(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
int i, ret;
long ioaddr, iolen;
long mmio_addr, mmio_len;
//judge whether the pointer is valid
//xen_platform_pdev is a pointer of pci_dev
//struct pci_dev *xen_platform_pdev; it's a global variable, from above you can get
//the details of pci_dev
if (xen_platform_pdev)
return -EBUSY;
xen_platform_pdev = pdev;
/**
* pci_enable_device - Initialize device before it's used by a driver.
* @dev: PCI device to be initialized
*
* Initialize device before it's used by a driver. Ask low-level code
* to enable I/O and memory. Wake up the device if it was suspended.
* Beware, this function can fail.
*
* Note we don't actually enable the device many times if we call
* this function repeatedly (we just increment the count).
* pci_enable_device--->__pci_enable_device_flags--->__pci_enable_device_flags
--->do_pci_enable_device--->do_pci_enable_device
*/
i = pci_enable_device(pdev);
if (i)
return i;
//include/linux/pci.h
//get the start address of pci_dev's resource
//#define pci_resource_start(dev, bar) ((dev)->resource[(bar)].start)
/*#define pci_resource_len(dev,bar) \
*((pci_resource_start((dev), (bar)) == 0 && \
* pci_resource_end((dev), (bar)) == \
* pci_resource_start((dev), (bar))) ? 0 : \
\
*(pci_resource_end((dev), (bar)) - \
*pci_resource_start((dev), (bar)) + 1))
*ioaddr = pci_resource_start(pdev, 0);
*iolen = pci_resource_len(pdev, 0);
*
*struct resource {
*resource_size_t start;
*resource_size_t end;
*const char *name;
*unsigned long flags;
*struct resource *parent, *sibling, *child;
};
*/
mmio_addr = pci_resource_start(pdev, 1);
mmio_len = pci_resource_len(pdev, 1);
//can't understand what's mean
//breakpoint 1
callback_via = get_callback_via(pdev);
[platform_pci_init--->get_callback_via]
static uint64_t get_callback_via(struct pci_dev *pdev)
{
u8 pin;
int irq;
#ifdef __ia64__
for (irq = 0; irq < 16; irq++) {
if (isa_irq_to_vector(irq) == pdev->irq)
return irq; /* ISA IRQ */
}
#else /* !__ia64__ */
irq = pdev->irq;
if (irq < 16)
return irq; /* ISA IRQ */
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
pin = pdev->pin;
#else
pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin);
#endif
/* We don't know the GSI. Specify the PCI INTx line instead. */
//still can't understand
return (((uint64_t)(HVMIRQ_callback_pci_intx - 1) << 56) |
((uint64_t)pci_domain_nr(pdev->bus) << 32) |
((uint64_t)pdev->bus->number << 16) |
((uint64_t)(pdev->devfn & 0xff) << 8) |
((uint64_t)(pin - 1) & 3));
}
from breakpoint 1:
if (mmio_addr == 0 || ioaddr == 0 || callback_via == 0) {
printk(KERN_WARNING DRV_NAME ":no resources found\n");
return -ENOENT;
}
//include/linux/ioport.h
//#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start),
// (n), (name), 0)
//get the io & mmio resources
if (request_mem_region(mmio_addr, mmio_len, DRV_NAME) == NULL) {
printk(KERN_ERR ":MEM I/O resource 0x%lx @ 0x%lx busy\n",
mmio_addr, mmio_len);
return -EBUSY;
}
if (request_region(ioaddr, iolen, DRV_NAME) == NULL) {
printk(KERN_ERR DRV_NAME ":I/O resource 0x%lx @ 0x%lx busy\n",
iolen, ioaddr);
release_mem_region(mmio_addr, mmio_len);
return -EBUSY;
}
platform_mmio = mmio_addr;
platform_mmiolen = mmio_len;
//to initialize the hyper call ?
//breakpoint 2 here should be considered with hyper call pages...read the book.
//I will talk this topic in another document.
//breakpoint 2
[platform_pci_init--->init_hypercall_stubs]
static int init_hypercall_stubs(void)
{
uint32_t eax, ebx, ecx, edx, pages, msr, i;
char signature[13];
//to get the signature???
///*
* Generic CPUID function
* clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
* resulting in stale register contents being returned.
*/
// #define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ;
// #define XEN_CPUID XEN_EMULATE_PREFIX cpuid
// static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, // unsigned int *ecx, unsigned int *edx)
// {
// __asm__(XEN_CPUID
// : "=a" (*eax),
// "=b" (*ebx),
// "=c" (*ecx),
// "=d" (*edx)
// : "0" (op), "c"(0));
// }
cpuid(0x40000000, &eax, &ebx, &ecx, &edx);
*(uint32_t*)(signature + 0) = ebx;
*(uint32_t*)(signature + 4) = ecx;
*(uint32_t*)(signature + 8) = edx;
signature[12] = 0;
//the result store in the signature
if (strcmp("XenVMMXenVMM", signature) || (eax < 0x40000002)) {
printk(KERN_WARNING
"Detected Xen platform device but not Xen VMM?"
" (sig %s, eax %x)\n",
signature, eax);
return -EINVAL;
}
//get the xen version
cpuid(0x40000001, &eax, &ebx, &ecx, &edx);
printk(KERN_INFO "Xen version %d.%d.\n", eax >> 16, eax & 0xffff);
/*
* Find largest supported number of hypercall pages.
* We'll create as many as possible up to this number.
* are there how many hyper call pages???
*/
cpuid(0x40000002, &pages, &msr, &ecx, &edx);
/*
* Use __vmalloc() because vmalloc_exec() is not an exported symbol.
* PAGE_KERNEL_EXEC also is not exported, hence we use PAGE_KERNEL.
* hypercall_stubs = vmalloc_exec(pages * PAGE_SIZE);
*/
while (pages > 0) {
//hypercall_stubs is a global variable to store the pointer
//of hypercall pages.
hypercall_stubs = __vmalloc(
pages * PAGE_SIZE,
GFP_KERNEL | __GFP_HIGHMEM,
__pgprot(__PAGE_KERNEL & ~_PAGE_NX));
if (hypercall_stubs != NULL)
break;
pages--; /* vmalloc failed: try one fewer pages */
}
if (hypercall_stubs == NULL)
return -ENOMEM;
for (i = 0; i < pages; i++) {
unsigned long pfn;
pfn = vmalloc_to_pfn((char *)hypercall_stubs + i*PAGE_SIZE);
wrmsrl(msr, ((u64)pfn << PAGE_SHIFT) + i);
}
nr_hypercall_stub_pages = pages;
max_hypercall_stub_pages = pages;
printk(KERN_INFO "Hypercall area is %u pages.\n", pages);
return 0;
}
//from breakpoint 2
ret = init_hypercall_stubs();
if (ret < 0)
goto out;
//init the xen info, here is breakpoint 3
if ((ret = init_xen_info()))
goto out;
//breakpoint 3
static int __devinit init_xen_info(void)
{
struct xen_add_to_physmap xatp;
/*
/*
* Sets the GPFN at which a particular page appears in the specified guest's
* pseudophysical address space.
* arg == addr of xen_add_to_physmap_t.
*/
#define XENMEM_add_to_physmap 7
struct xen_add_to_physmap {
/* Which domain to change the mapping for. */
domid_t domid;
/* Source mapping space. */
#define XENMAPSPACE_shared_info 0 /* shared info page */
#define XENMAPSPACE_grant_table 1 /* grant table page */
unsigned int space;
/* Index into source mapping space. */
xen_ulong_t idx;
/* GPFN where the source mapping page should appear. */
xen_pfn_t gpfn;
};
*/
extern void *shared_info_area;
#ifdef __ia64__
xencomm_init();
#endif
//fill the xen_feature[] array to store the version info of XEN
setup_xen_features();
//get the memory resource from the pci device resource space, page frame number.
//and then call the hypercall to add this shared info page
//xen/arch/x86/mm.c
//the hypercall trace is: do_memory_op--->arch_memory_op--->
//virt_to_mfn(d->shared_info)(get the target domain's shared_info_page address, mfn)
//--->guest_physmap_add_page--->guest_physmap_add_entry
//The result is to map the domain's shared info page to the pci device's memory space:
//shared_info_frame
shared_info_frame = alloc_xen_mmio(PAGE_SIZE) >> PAGE_SHIFT;
xatp.domid = DOMID_SELF;
xatp.idx = 0;
xatp.space = XENMAPSPACE_shared_info;
xatp.gpfn = shared_info_frame;
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
BUG();
//map the domain's shared info page to shared_info_area, this variable is very
//important, it store the domain's shared info page, was needed when operate
//evtchn, such as
/*void mask_evtchn(int port)
{
shared_info_t *s = shared_info_area;
synch_set_bit(port, &s->evtchn_mask[0]);
}
*/
shared_info_area =
ioremap(shared_info_frame << PAGE_SHIFT, PAGE_SIZE);
if (shared_info_area == NULL)
panic("can't map shared info\n");
return 0;
}
//from breakpoint 3
//init the grant table, the details of grant table I have stated in another document
if ((ret = gnttab_init()))
goto out;
//init the event channel
if ((ret = xen_irq_init(pdev)))
goto out;
if ((ret = set_callback_via(callback_via)))
goto out;
if ((ret = xenbus_init()))
goto out;