The layer of an block device operation:
figure 1 the layer of block operation
figure 2 struce relationship
some data units in the block operation
section:
1.gendisk
include/linux/genhd.h(based on 2.6.31.13)
/*
A disk is a logical block device that is handled by the generic block layer. Usually a disk
corresponds to a hardware block device such as a hard disk, a floppy disk, or a CD-ROM disk
However, a disk can be a virtual device built upon several physical disk partitions, or a storage area living in some dedicated pages of RAM. In any case, the upper kernel components operate on all disks in the same way thanks to the services offered by the generic block layer.
*/
struct gendisk {
/* major, first_minor and minors are input parameters only,
* don't use directly. Use disk_devt() and disk_max_parts().
*/
int major; /* major number of driver */
int first_minor;
int minors; /* maximum number of minors, =1 for
* disks that can't be partitioned. */
char disk_name[DISK_NAME_LEN]; /* name of major driver */
char *(*devnode)(struct gendisk *gd, mode_t *mode);
/* Array of pointers to partitions indexed by partno.
* Protected with matching bdev lock but stat and other
* non-critical accesses use RCU. Always access through
* helpers.
*/
struct disk_part_tbl *part_tbl;
struct hd_struct part0;
const struct block_device_operations *fops;
struct request_queue *queue;
void *private_data;
int flags;
struct device *driverfs_dev; // FIXME: remove
struct kobject *slave_dir;
struct timer_rand_state *random;
atomic_t sync_io; /* RAID */
struct work_struct async_notify;
#ifdef CONFIG_BLK_DEV_INTEGRITY
struct blk_integrity *integrity;
#endif
int node_id;
};
include/linux/blkdev.h
struct request_queue
{
/*
* Together with queue_head for cacheline sharing
*/
struct list_head queue_head;
struct request *last_merge;
struct elevator_queue *elevator;
/*
* the queue request freelist, one for reads and one for writes
*/
struct request_list rq;
request_fn_proc *request_fn;
make_request_fn *make_request_fn;
prep_rq_fn *prep_rq_fn;
unplug_fn *unplug_fn;
merge_bvec_fn *merge_bvec_fn;
prepare_flush_fn *prepare_flush_fn;
softirq_done_fn *softirq_done_fn;
rq_timed_out_fn *rq_timed_out_fn;
dma_drain_needed_fn *dma_drain_needed;
lld_busy_fn *lld_busy_fn;
/*
* Dispatch queue sorting
*/
sector_t end_sector;
struct request *boundary_rq;
/*
* Auto-unplugging state
*/
struct timer_list unplug_timer;
int unplug_thresh; /* After this many requests */
unsigned long unplug_delay; /* After this many jiffies */
struct work_struct unplug_work;
struct backing_dev_info backing_dev_info;
/*
* The queue owner gets to use this for whatever they like.
* ll_rw_blk doesn't touch it.
*/
void *queuedata;
/*
* queue needs bounce pages for pages above this limit
*/
gfp_t bounce_gfp;
/*
* various queue flags, see QUEUE_* below
*/
unsigned long queue_flags;
/*
* protects queue structures from reentrancy. ->__queue_lock should
* _never_ be used directly, it is queue private. always use
* ->queue_lock.
*/
spinlock_t __queue_lock;
spinlock_t *queue_lock;
/*
* queue kobject
*/
struct kobject kobj;
/*
* queue settings
*/
unsigned long nr_requests; /* Max # of requests */
unsigned int nr_congestion_on;
unsigned int nr_congestion_off;
unsigned int nr_batching;
unsigned int rq_timeout;
struct timer_list timeout;
struct list_head timeout_list;
struct queue_limits limits;
/*
* sg stuff
*/
unsigned int sg_timeout;
unsigned int sg_reserved_size;
int node;
#ifdef CONFIG_BLK_DEV_IO_TRACE
struct blk_trace *blk_trace;
#endif
/*
* reserved for flush operations
*/
unsigned int ordered, next_ordered, ordseq;
int orderr, ordcolor;
struct request pre_flush_rq, bar_rq, post_flush_rq;
struct request *orig_bar_rq;
struct mutex sysfs_lock;
#if defined(CONFIG_BLK_DEV_BSG)
struct bsg_class_device bsg_dev;
#endif
};
include/linux/blkdev.h
/*
* try to put the fields that are referenced together in the same cacheline.
* if you modify this structure, be sure to check block/blk-core.c:rq_init()
* as well!
*/
struct request {
struct list_head queuelist;
struct call_single_data csd;
int cpu;
struct request_queue *q;
unsigned int cmd_flags;
enum rq_cmd_type_bits cmd_type;
unsigned long atomic_flags;
/* the following two fields are internal, NEVER access directly */
sector_t __sector; /* sector cursor */
unsigned int __data_len; /* total data len */
struct bio *bio;
struct bio *biotail;
struct hlist_node hash; /* merge hash */
/*
* The rb_node is only used inside the io scheduler, requests
* are pruned when moved to the dispatch queue. So let the
* completion_data share space with the rb_node.
*/
union {
struct rb_node rb_node; /* sort/lookup */
void *completion_data;
};
/*
* two pointers are available for the IO schedulers, if they need
* more they have to dynamically allocate it.
*/
void *elevator_private;
void *elevator_private2;
struct gendisk *rq_disk;
unsigned long start_time;
/* Number of scatter-gather DMA addr+len pairs after
* physical address coalescing is performed.
*/
unsigned short nr_phys_segments;
unsigned short ioprio;
void *special; /* opaque pointer available for LLD use */
char *buffer; /* kaddr of the current segment if available */
int tag;
int errors;
int ref_count;
/*
* when request is used as a packet command carrier
*/
unsigned short cmd_len;
unsigned char __cmd[BLK_MAX_CDB];
unsigned char *cmd;
unsigned int extra_len; /* length of alignment and padding */
unsigned int sense_len;
unsigned int resid_len; /* residual count */
void *sense;
unsigned long deadline;
struct list_head timeout_list;
unsigned int timeout;
int retries;
/*
* completion callback.
*/
rq_end_io_fn *end_io;
void *end_io_data;
/* for bidi */
struct request *next_rq;
};
include/linux/bio.h
/*
* main unit of I/O for the block layer and lower layers (ie drivers and
* stacking drivers)
*/
/* The contents of a bio descriptor keep changing during the block I/O operation.For instance,
if the block device driver cannot perform the whole data transfer with one scatter-gather DMA
operation, the bi_idx field is updated to keep track of the first segment in the bio that is
yet to be transferred. To iterate over the segments of a bio starting from the current segment
at index bi_idxa device driver can execute the bio_for_each_segment macro.
When the generic block layer starts a new I/O operation, it allocates a new bio structure by
invoking the bio_alloc( ) function. Usually, bios are allocated through the slab allocator, but the kernel also keeps a small memory pool of bios to be used when memory is scarce (see the section "Memory Pools" in Chapter 8). The kernel also keeps a memory pool for the bio_vec
structuresafter all, it would not make sense to allocate a bio without being able to allocate the segment descriptors to be included in the bio. Correspondingly, the bio_put( ) function
decrements the reference counter (bi_cnt) of a bio and, if the counter becomes zero, it releases
the bio structure and the related bio_vec structures.
*/
struct bio {
sector_t bi_sector; /* device address in 512 byte
sectors */
struct bio *bi_next; /* request queue link */
struct block_device *bi_bdev;
unsigned long bi_flags; /* status, command, etc */
unsigned long bi_rw; /* bottom bits READ/WRITE,
* top bits priority
*/
unsigned short bi_vcnt; /* how many bio_vec's */
unsigned short bi_idx; /* current index into bvl_vec */
/* Number of segments in this BIO after
* physical address coalescing is performed.
*/
unsigned int bi_phys_segments;
unsigned int bi_size; /* residual I/O count */
/*
* To keep track of the max segment size, we account for the
* sizes of the first and last mergeable segments in this bio.
*/
unsigned int bi_seg_front_size;
unsigned int bi_seg_back_size;
unsigned int bi_max_vecs; /* max bvl_vecs we can hold */
unsigned int bi_comp_cpu; /* completion CPU */
atomic_t bi_cnt; /* pin count */
struct bio_vec *bi_io_vec; /* the actual vec list */
bio_end_io_t *bi_end_io;
void *bi_private;
#if defined(CONFIG_BLK_DEV_INTEGRITY)
struct bio_integrity_payload *bi_integrity; /* data integrity */
#endif
bio_destructor_t *bi_destructor; /* destructor */
/*
* We can inline a number of vecs at the end of the bio, to avoid
* double allocations for a small number of bio_vecs. This member
* MUST obviously be kept at the very end of the bio.
*/
struct bio_vec bi_inline_vecs[0];
};
/*
* was unsigned short, but we might as well be ready for > 64kB I/O pages
*/
struct bio_vec {
struct page *bv_page;
unsigned int bv_len;
unsigned int bv_offset;
};
//the function of bio: a buffer between block device & fs.
//should to study address_space, address_space_operations, read the code RAMDISK
//read the ULK chapter 15 16