Ok, today I'll write the details of grant table implementation in XEN split device drivers. XEN's grant table mechanism like the share memory of linux support the communication
between domains. This file based on para-drivers platform-pci/gnttab.c.
Let's begin from the init function.
int __devinit gnttab_init(void)
{
int i;
unsigned int max_nr_glist_frames;
unsigned int nr_init_grefs;
if (!is_running_on_xen()) //judge whether it's on xen
return -ENODEV;
nr_grant_frames = 1; //the number of page permit grant.
//set the default value to 1
boot_max_nr_grant_frames = __max_nr_grant_frames(); //get the max number of pages
//should be granted, take closer look in __max_nr_grant_frames()
//here is breakpoint 1
static unsigned int __max_nr_grant_frames(void)
{
struct gnttab_query_size query; //query struct
int rc;
query.dom = DOMID_SELF;
//hyper call to get the info of struct query, ok, I'll analyse the hyper call
//in detail in future.
rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
if ((rc < 0) || (query.status != GNTST_okay))
return 4; /* Legacy max supported number of frames */
return query.max_nr_frames;
}
This is a common operation on grant table. How many types of operation in the grant table? I'll give them below.
Map the page from one domain to another.
/*
* GNTTABOP_map_grant_ref: Map the grant entry (,[) for access]
* by devices and/or host CPUs. If successful, is a tracking number
* that must be presented later to destroy the mapping(s). On error,
* is a negative status code.
* NOTES:
* 1. If GNTMAP_device_map is specified then is the address
* via which I/O devices may access the granted frame.
* 2. If GNTMAP_host_map is specified then a mapping will be added at
* either a host virtual address in the current address space, or at
* a PTE at the specified machine address. The type of mapping to
* perform is selected through the GNTMAP_contains_pte flag, and the
* address is specified in .
* 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a
* host mapping is destroyed by other means then it is *NOT* guaranteed
* to be accounted to the correct grant reference!
*/
#define GNTTABOP_map_grant_ref 0
struct gnttab_map_grant_ref {
/* IN parameters. */
uint64_t host_addr;
uint32_t flags; /* GNTMAP_* */
grant_ref_t ref;
domid_t dom;
/* OUT parameters. */
int16_t status; /* GNTST_* */
grant_handle_t handle;
uint64_t dev_bus_addr;
};
typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t;
DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t);
/*
* GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings
* tracked by . If or is zero, that
* field is ignored. If non-zero, they must refer to a device/host mapping
* that is tracked by
* NOTES:
* 1. The call may fail in an undefined manner if either mapping is not
* tracked by .
* 3. After executing a batch of unmaps, it is guaranteed that no stale
* mappings will remain in the device or host TLBs.
*/
#define GNTTABOP_unmap_grant_ref 1
struct gnttab_unmap_grant_ref {
/* IN parameters. */
uint64_t host_addr;
uint64_t dev_bus_addr;
grant_handle_t handle;
/* OUT parameters. */
int16_t status; /* GNTST_* */
};
typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t;
DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t);
/*
* GNTTABOP_setup_table: Set up a grant table for comprising at least
* pages. The frame addresses are written to the .
* Only addresses are written, even if the table is larger.
* NOTES:
* 1. may be specified as DOMID_SELF.
* 2. Only a sufficiently-privileged domain may specify != DOMID_SELF.
* 3. Xen may not support more than a single grant-table page per domain.
*/
#define GNTTABOP_setup_table 2
struct gnttab_setup_table {
/* IN parameters. */
domid_t dom;
uint32_t nr_frames;
/* OUT parameters. */
int16_t status; /* GNTST_* */
XEN_GUEST_HANDLE(ulong) frame_list;
};
typedef struct gnttab_setup_table gnttab_setup_table_t;
DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t);
/*
* GNTTABOP_dump_table: Dump the contents of the grant table to the
* xen console. Debugging use only.
*/
#define GNTTABOP_dump_table 3
struct gnttab_dump_table {
/* IN parameters. */
domid_t dom;
/* OUT parameters. */
int16_t status; /* GNTST_* */
};
typedef struct gnttab_dump_table gnttab_dump_table_t;
DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t);
/*
* GNTTABOP_transfer_grant_ref: Transfer to a foreign domain. The
* foreign domain has previously registered its interest in the transfer via
* .
*
* Note that, even if the transfer fails, the specified page no longer belongs
* to the calling domain *unless* the error is GNTST_bad_page.
*/
#define GNTTABOP_transfer 4
struct gnttab_transfer {
/* IN parameters. */
xen_pfn_t mfn;
domid_t domid;
grant_ref_t ref;
/* OUT parameters. */
int16_t status;
};
typedef struct gnttab_transfer gnttab_transfer_t;
DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t);
/*
* GNTTABOP_copy: Hypervisor based copy
* source and destinations can be eithers MFNs or, for foreign domains,
* grant references. the foreign domain has to grant read/write access
* in its grant table.
*
* The flags specify what type source and destinations are (either MFN
* or grant reference).
*
* Note that this can also be used to copy data between two domains
* via a third party if the source and destination domains had previously
* grant appropriate access to their pages to the third party.
*
* source_offset specifies an offset in the source frame, dest_offset
* the offset in the target frame and len specifies the number of
* bytes to be copied.
*/
#define _GNTCOPY_source_gref (0)
#define GNTCOPY_source_gref (1<<_GNTCOPY_source_gref)
#define _GNTCOPY_dest_gref (1)
#define GNTCOPY_dest_gref (1<<_GNTCOPY_dest_gref)
#define GNTTABOP_copy 5
typedef struct gnttab_copy {
/* IN parameters. */
struct {
union {
grant_ref_t ref;
xen_pfn_t gmfn;
} u;
domid_t domid;
uint16_t offset;
} source, dest;
uint16_t len;
uint16_t flags; /* GNTCOPY_* */
/* OUT parameters. */
int16_t status;
} gnttab_copy_t;
DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t);
/*
* GNTTABOP_query_size: Query the current and maximum sizes of the shared
* grant table.
* NOTES:
* 1. may be specified as DOMID_SELF.
* 2. Only a sufficiently-privileged domain may specify != DOMID_SELF.
*/
#define GNTTABOP_query_size 6
struct gnttab_query_size {
/* IN parameters. */
domid_t dom;
/* OUT parameters. */
uint32_t nr_frames;
uint32_t max_nr_frames;
int16_t status; /* GNTST_* */
};
typedef struct gnttab_query_size gnttab_query_size_t;
DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t);
/*
* GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings
* tracked by but atomically replace the page table entry with one
* pointing to the machine address under . will be
* redirected to the null entry.
* NOTES:
* 1. The call may fail in an undefined manner if either mapping is not
* tracked by .
* 2. After executing a batch of unmaps, it is guaranteed that no stale
* mappings will remain in the device or host TLBs.
*/
#define GNTTABOP_unmap_and_replace 7
struct gnttab_unmap_and_replace {
/* IN parameters. */
uint64_t host_addr;
uint64_t new_addr;
grant_handle_t handle;
/* OUT parameters. */
int16_t status; /* GNTST_* */
};
typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t;
DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t);
We'll meet this operation in the following code. Now let's return the init function breakpoint 1.
from breakpoint 1:
/* Determine the maximum number of frames required for the
* grant reference free list on the current hypervisor.
*/
max_nr_glist_frames = (boot_max_nr_grant_frames *
GREFS_PER_GRANT_FRAME /
(PAGE_SIZE / sizeof(grant_ref_t)));
//malloc for gnttab_list, it's an int pointer-to-pointer of grant reference
gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
GFP_KERNEL);
if (gnttab_list == NULL)
return -ENOMEM;
//This loop get the physical memory page's virtual address
//(from guest os memory space) & store them
//in the gnttab_list
for (i = 0; i < nr_grant_frames; i++) {
gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
if (gnttab_list[i] == NULL)
goto ini_nomem;
}
//this is breakpoint 2, let's analyse the below function slowly
if (gnttab_resume() < 0)
return -ENODEV;
#ifdef CONFIG_XEN
int gnttab_resume(void)
{
//if the current granted page frames bigger than max, go out
if (max_nr_grant_frames() < nr_grant_frames)
return -ENOSYS;
return gnttab_map(0, nr_grant_frames - 1);
}
static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
{
struct gnttab_setup_table setup; //this struct we have displayed
// struct gnttab_setup_table {
// 226 /* IN parameters. */
// 227 domid_t dom;
// 228 uint32_t nr_frames;
// 229 /* OUT parameters. */
// 230 int16_t status; /* GNTST_* */
// 231 XEN_GUEST_HANDLE(ulong) frame_list;
// 232 };
unsigned long *frames;
unsigned int nr_gframes = end_idx + 1;
int rc;
//by now, can't understand why should malloc this memory space.
//store the hyper call return result.
frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
if (!frames)
return -ENOMEM;
setup.dom = DOMID_SELF;
setup.nr_frames = nr_gframes;
//it's a macro : #define set_xen_guest_handle(hnd, val) \
// do { (hnd).p = val; } while (0)
//ok, since frame_list is an output parameter, here let frame_list equal frames
//I think it means to store the hyper call return result with this variable:frames
set_xen_guest_handle(setup.frame_list, frames);
//hyper call to setup table.
rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
if (rc == -ENOSYS) {
kfree(frames);
return -ENOSYS;
}
BUG_ON(rc || setup.status);
//In this file, there is global variable named shared
//static struct grant_entry *shared;
// /*
// * A grant table comprises a packed array of grant entries in one or more
// * page frames shared between Xen and a guest.
// * [XEN]: This field is written by Xen and read by the sharing guest.
// * [GST]: This field is written by the guest and read by Xen.
// */
// struct grant_entry {
// /* GTF_xxx: various type and flag information. [XEN,GST] */
// uint16_t flags;
// /* The domain being granted foreign privileges. [GST] */
// domid_t domid;
// /*
// * GTF_permit_access: Frame that @domid is allowed to map and access. [GST]
// * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN]
// */
// uint32_t frame;
// };
//typedef struct grant_entry grant_entry_t;
#ifndef __ia64__
if (shared == NULL) {
struct vm_struct *area;
area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames());
BUG_ON(area == NULL);
shared = area->addr;
}
rc = apply_to_page_range(&init_mm, (unsigned long)shared,
PAGE_SIZE * nr_gframes,
map_pte_fn, &frames);
BUG_ON(rc);
frames -= nr_gframes; /* adjust after map_pte_fn() */
//struct vm_struct {
//struct vm_struct *next;
//void *addr;
//unsigned long size;
//unsigned long flags;
//struct page **pages;
//unsigned int nr_pages;
//unsigned long phys_addr;
//void *caller;
//};
#else
//record the grant_entry's virtual address.
shared = __va(frames[0] << PAGE_SHIFT);
#endif
kfree(frames);
return 0;
}
#else /*!CONFIG_XEN, in the independent para-driver package,
*these function will be adopted
*/
int gnttab_resume(void)
{
unsigned int max_nr_gframes, nr_gframes;
nr_gframes = nr_grant_frames;
max_nr_gframes = max_nr_grant_frames();
if (max_nr_gframes < nr_gframes)
return -ENOSYS;
if (!resume_frames) {
resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
//what's ioremap function? generally speaking it just map page from device
//physical memory to kernel memory and can be managed by mmu, return the virtual
//address, so, here shared store the virtual address of resume_frames, but I
//can't understand this operation at here will map what from where to where???
shared = ioremap(resume_frames, PAGE_SIZE * max_nr_gframes);
if (shared == NULL) {
printk("error to ioremap gnttab share frames\n");
return -1;
}
}
gnttab_map(0, nr_gframes - 1);
return 0;
}
[gnttab_resume--->alloc_xen_mmio]
unsigned long alloc_xen_mmio(unsigned long len)
{
unsigned long addr;
addr = platform_mmio + platform_mmio_alloc;
platform_mmio_alloc += len;
BUG_ON(platform_mmio_alloc > platform_mmiolen);
/*static unsigned long platform_mmio;
*static unsigned long platform_mmio_alloc;
*it was initialized as: platform_mmio = mmio_addr;
*mmio_addr = pci_resource_start(pdev, 1);
*#define pci_resource_start(dev, bar) ((dev)->resource[(bar)].start)
*struct resource {
* resource_size_t start;
* resource_size_t end;
* const char *name;
* unsigned long flags;
* struct resource *parent, *sibling, *child;
*};
*/ return addr;}
static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
{
//hyper call struct
/*
* Sets the GPFN at which a particular page appears in the specified guest's
* pseudo physical address space.
* arg == addr of xen_add_to_physmap_t.
*/
* #define XENMEM_add_to_physmap 7
struct xen_add_to_physmap {
/* Which domain to change the mapping for. */
domid_t domid;
/* Source mapping space. */
#define XENMAPSPACE_shared_info 0 /* shared info page */
#define XENMAPSPACE_grant_table 1 /* grant table page */
unsigned int space;
/* Index into source mapping space. */
xen_ulong_t idx;
/* GPFN where the source mapping page should appear. */
xen_pfn_t gpfn;
};
struct xen_add_to_physmap xatp;
unsigned int i = end_idx;
/* Loop backwards, so that the first hypercall has the largest index,
* ensuring that the table will grow only once.
*/
do {
xatp.domid = DOMID_SELF;
xatp.idx = i;
xatp.space = XENMAPSPACE_grant_table;
xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i;
//hyper call to add the page of shared array to xen memory space???
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
BUG();
} while (i-- > start_idx);
return 0;
}
from breakpoint 2:
................
nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
//#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t))
//this macro means the number of entry items per page can contain
//here calculate there are how many grant entry items in total
//#define NR_RESERVED_ENTRIES 8
//the first 8 entries are reserved, so jump them
//#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
//this macro means the number of grant_ref_t per page contain
//#define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP])
//locate the target entry.initialize the gnttab_list, from above we know
//gnttab_list is an pointer-to-pointer, the first dimension has been initialized
//with page address, here initialize it's second dimension with just index.
//but, I can't understand the relation between gnttab_list & shared.
//how to associate with them??????
//while, shared is: static struct grant_entry *shared;
//gnttab_list is: static grant_ref_t **gnttab_list;
for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
gnttab_entry(i) = i + 1;
gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
gnttab_free_head = NR_RESERVED_ENTRIES;
return 0;
ini_nomem:
for (i--; i >= 0; i--)
free_page((unsigned long)gnttab_list[i]);
kfree(gnttab_list);
return -ENOMEM;
}
2. Another function to operate the grant table
int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
{
int h = get_free_entries(count);
if (h < 0)
return -ENOSPC;
*head = h;
return 0;
}
EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
[gnttab_alloc_grant_references--->get_free_entries]
static int get_free_entries(int count)
{
unsigned long flags;
int ref, rc;
grant_ref_t head;
//get the spin lock, save the irq
spin_lock_irqsave(&gnttab_list_lock, flags);
//if the free entries aren't enough, so expand it
if ((gnttab_free_count < count) &&
((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
spin_unlock_irqrestore(&gnttab_list_lock, flags);
return rc;
}
//set the ref with the first free entry, move the head to the next free entry.
//#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
//*the number of grant_ref_t per page
//#define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP])
//the entry was in which page & the offset in the page.
ref = head = gnttab_free_head; gnttab_free_count -= count;
while (count-- > 1)
head = gnttab_entry(head);
gnttab_free_head = gnttab_entry(head);
gnttab_entry(head) = GNTTAB_LIST_END;
spin_unlock_irqrestore(&gnttab_list_lock, flags);
return ref;
}
[gnttab_alloc_grant_references--->get_free_entries
--->gnttab_expand]
static int gnttab_expand(unsigned int req_entries)
{
int rc;
unsigned int cur, extra;
cur = nr_grant_frames;
extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
GREFS_PER_GRANT_FRAME);
if (cur + extra > max_nr_grant_frames())
return -ENOSPC;
//this function gnttab_map has already analysed above.
if ((rc = gnttab_map(cur, cur + extra - 1)) == 0)
rc = grow_gnttab_list(extra);
return rc;
}
[gnttab_alloc_grant_references--->get_free_entries
--->gnttab_expand--->grow_gnttab_list]
static int grow_gnttab_list(unsigned int more_frames)
{
unsigned int new_nr_grant_frames, extra_entries, i;
new_nr_grant_frames = nr_grant_frames + more_frames;
extra_entries = more_frames * GREFS_PER_GRANT_FRAME;
for (i = nr_grant_frames; i < new_nr_grant_frames; i++)
{
gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
if (!gnttab_list[i])
goto grow_nomem;
}
for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
gnttab_entry(i) = i + 1;
gnttab_entry(i) = gnttab_free_head;
gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
gnttab_free_count += extra_entries;
nr_grant_frames = new_nr_grant_frames;
check_free_callbacks();
return 0;
grow_nomem:
for ( ; i >= nr_grant_frames; i--)
free_page((unsigned long) gnttab_list[i]);
return -ENOMEM;
}
=====call trace of gnttab_grant_foreign_access=====
/*
* Public grant-issuing interface functions
*/
int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
int readonly)
{
int ref;
//get free ref
if (unlikely((ref = get_free_entry()) < 0))
return -ENOSPC;
shared[ref].frame = frame;
shared[ref].domid = domid;
wmb();
shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
return ref;
/*let's consider who will call this function?
*I search for through all the para driver code, find it only in xenbus/xenbus_client.c
*/
}
int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
{
int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0);
if (err < 0)
xenbus_dev_fatal(dev, err, "granting access to ring page");
return err;
}
EXPORT_SYMBOL_GPL(xenbus_grant_ring);
//while, who will call xenbus_grant_ring?
//in the blkfront.c --->setup_blkring & netfront.c --->setup_device, to setup the shared ring //buffer.
//./drivers/blkfront/blkfront.c
//off course, some structs of ring should be stated at first.
/* Shared ring entry */ \
union __name##_sring_entry { \
__req_t req; \
__rsp_t rsp; \
}; \
\
/* Shared ring page */ \
struct __name##_sring { \
RING_IDX req_prod, req_event; \
RING_IDX rsp_prod, rsp_event; \
uint8_t pad[48]; \
union __name##_sring_entry ring[1]; /* variable-length */ \
}; \
\
/* "Front" end's private variables */ \
struct __name##_front_ring { \
RING_IDX req_prod_pvt; \
RING_IDX rsp_cons; \
unsigned int nr_ents; \
struct __name##_sring *sring; \
}; \
typedef struct __name##_sring __name##_sring_t; \
typedef struct __name##_front_ring __name##_front_ring_t; \
/* Initialising empty rings */
#define SHARED_RING_INIT(_s) do { \
(_s)->req_prod = (_s)->rsp_prod = 0; \
(_s)->req_event = (_s)->rsp_event = 1; \
(void)memset((_s)->pad, 0, sizeof((_s)->pad)); \
} while(0)
#define FRONT_RING_INIT(_r, _s, __size) do { \
(_r)->req_prod_pvt = 0; \
(_r)->rsp_cons = 0; \
(_r)->nr_ents = __RING_SIZE(_s, __size); \
(_r)->sring = (_s); \
} while (0)
static int setup_blkring(struct xenbus_device *dev,
struct blkfront_info *info)
{
blkif_sring_t *sring;
int err;
info->ring_ref = GRANT_INVALID_REF;
sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL);
if (!sring) {
xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
return -ENOMEM;
}
SHARED_RING_INIT(sring);
FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
//virt_to_mfn(), convert the virtual address to machine frame number
err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
if (err < 0) {
free_page((unsigned long)sring);
info->ring.sring = NULL;
goto fail;
}
info->ring_ref = err;
err = bind_listening_port_to_irqhandler(
dev->otherend_id, blkif_int, SA_SAMPLE_RANDOM, "blkif", info);
if (err <= 0) {
xenbus_dev_fatal(dev, err,
"bind_listening_port_to_irqhandler");
goto fail;
}
info->irq = err;
return 0;
fail:
blkif_free(info, 0);
return err;
}
=====call trace of gnttab_grant_foreign_access_ref=====
//already have the grant ref, and then set it.
void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
unsigned long frame, int readonly)
{
shared[ref].frame = frame;
shared[ref].domid = domid;
wmb();
shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
}
EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
./drivers/blkfront/vbd.c
xlvbd_init_blk_queue--->blk_init_queue
...
rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
if (rq == NULL)
return -1;
...
blk_init_queue--->do_blkif_request--->blkif_queue_request
/*
* blkif_queue_request
*
* request block io
*
* id: for guest use only.
* operation: BLKIF_OP_{READ,WRITE,PROBE}
* buffer: buffer to read/write into. this should be a
* virtual address in the guest os.
*/
static int blkif_queue_request(struct request *req)
{
//convert req to blkfront_info
struct blkfront_info *info = compat_req_to_info(req);
unsigned long buffer_mfn;
blkif_request_t *ring_req;
struct bio *bio;
struct bio_vec *bvec;
int idx;
unsigned long id;
unsigned int fsect, lsect;
int ref;
grant_ref_t gref_head;
if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
return 1;
//breakpoint
if (gnttab_alloc_grant_references(
BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
gnttab_request_free_callback(
&info->callback,
blkif_restart_queue_callback,
info,
BLKIF_MAX_SEGMENTS_PER_REQUEST);
return 1;
}
//breakpoint
//[blkif_queue_request--->gnttab_alloc_grant_references]
int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
{
int h = get_free_entries(count);
if (h < 0)
return -ENOSPC;
*head = h;
return 0;
}
//from breakpoint
/* Fill out a communications ring structure. */
ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
id = GET_ID_FROM_FREELIST(info);
info->shadow[id].request = (unsigned long)req;
ring_req->id = id;
ring_req->sector_number = (blkif_sector_t) compat_get_sector_number(req);
ring_req->handle = info->handle;
ring_req->operation = rq_data_dir(req) ?
BLKIF_OP_WRITE : BLKIF_OP_READ;
if (blk_barrier_rq(req))
ring_req->operation = BLKIF_OP_WRITE_BARRIER;
ring_req->nr_segments = 0;
rq_for_each_bio (bio, req) {
bio_for_each_segment (bvec, bio, idx) {
BUG_ON(ring_req->nr_segments
== BLKIF_MAX_SEGMENTS_PER_REQUEST);
//this is very important to get the target page from bio->bvec
//then, put this page into grant table, so it can be accessed by
//other domain. while, req how to bio?
//#define __rq_for_each_bio(_bio, rq) \
// if ((rq->bio)) \
// for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
//this point(about request,bio) will be stated in another document.
buffer_mfn = page_to_phys(bvec->bv_page) >> PAGE_SHIFT;
fsect = bvec->bv_offset >> 9;
lsect = fsect + (bvec->bv_len >> 9) - 1;
/* install a grant reference. */
/* int gnttab_claim_grant_reference(grant_ref_t *private_head)
* {
* grant_ref_t g = *private_head;
* if (unlikely(g == GNTTAB_LIST_END))
* return -ENOSPC;
* *private_head = gnttab_entry(g);
* return g;
* }
*/
ref = gnttab_claim_grant_reference(&gref_head);
BUG_ON(ref == -ENOSPC);
gnttab_grant_foreign_access_ref(
ref,
info->xbdev->otherend_id,
buffer_mfn,
rq_data_dir(req) );
info->shadow[id].frame[ring_req->nr_segments] =
mfn_to_pfn(buffer_mfn);
ring_req->seg[ring_req->nr_segments] =
(struct blkif_request_segment) {
.gref = ref,
.first_sect = fsect,
.last_sect = lsect };
ring_req->nr_segments++;
}
}
info->ring.req_prod_pvt++;
/* Keep a private copy so we can reissue requests when recovering. */
info->shadow[id].req = *ring_req;
gnttab_free_grant_references(gref_head);
return 0;
}
==========trace of gnttab_end_foreign_access===========
void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
unsigned long page)
{
if (gnttab_end_foreign_access_ref(ref, readonly)) {
put_free_entry(ref);
if (page != 0)
free_page(page);
} else {
/* XXX This needs to be fixed so that the ref and page are
placed on a list to be freed up later. */
printk(KERN_DEBUG
"WARNING: leaking g.e. and page still in use!\n");
}
}
[gnttab_end_foreign_access--->gnttab_end_foreign_access_ref]
int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
{
u16 flags, nflags;
nflags = shared[ref].flags;
do {
if ((flags = nflags) & (GTF_reading|GTF_writing)) {
printk(KERN_DEBUG "WARNING: g.e. still in use!\n");
return 0;
}
} while ((nflags = synch_cmpxchg_subword(&shared[ref].flags, flags, 0)) !=
flags);
return 1;
}
[gnttab_end_foreign_access--->put_free_entry]
//free this grant ref
static void put_free_entry(grant_ref_t ref)
{
unsigned long flags;
spin_lock_irqsave(&gnttab_list_lock, flags);
gnttab_entry(ref) = gnttab_free_head;
gnttab_free_head = ref;
gnttab_free_count++;
check_free_callbacks();
spin_unlock_irqrestore(&gnttab_list_lock, flags);
}
call trace
[setup_blkring--->blkif_int--->blkif_completion--->gnttab_end_foreign_access]blkif_int is an interrupt handle function. this function is the block device interrupt process
function, then how to establish and install this handle
...
err = bind_listening_port_to_irqhandler(
dev->otherend_id, blkif_int, SA_SAMPLE_RANDOM, "blkif", info);
...
[setup_blkring--->bind_listening_port_to_irqhandler]
int bind_listening_port_to_irqhandler(
unsigned int remote_domain,
irqreturn_t (*handler)(int, void *, struct pt_regs *),
unsigned long irqflags,
const char *devname,
void *dev_id)
{
struct evtchn_alloc_unbound alloc_unbound;
int err, irq;
irq = alloc_xen_irq();
if (irq < 0)
return irq;
spin_lock_irq(&irq_evtchn[irq].lock);
alloc_unbound.dom = DOMID_SELF;
alloc_unbound.remote_dom = remote_domain;
err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
&alloc_unbound);
if (err) {
spin_unlock_irq(&irq_evtchn[irq].lock);
free_xen_irq(irq);
return err;
}
irq_evtchn[irq].handler = handler;
irq_evtchn[irq].dev_id = dev_id;
irq_evtchn[irq].evtchn = alloc_unbound.port;
irq_evtchn[irq].close = 1;
evtchn_to_irq[alloc_unbound.port] = irq;
unmask_evtchn(alloc_unbound.port);
spin_unlock_irq(&irq_evtchn[irq].lock);
return irq;
}