Chinaunix首页 | 论坛 | 博客
  • 博客访问: 358306
  • 博文数量: 167
  • 博客积分: 2867
  • 博客等级: 少校
  • 技术积分: 1306
  • 用 户 组: 普通用户
  • 注册时间: 2010-05-12 00:08
文章分类

全部博文(167)

文章存档

2017年(10)

2016年(5)

2015年(9)

2014年(10)

2013年(5)

2012年(17)

2011年(110)

2010年(1)

我的朋友

分类: LINUX

2011-02-16 11:24:57

The layer of an block device operation:

                         
                               figure 1 the layer of block operation


                                  figure 2 struce relationship 
some data units in the block operation
section:
1.gendisk
include/linux/genhd.h(based on 2.6.31.13)
/*
  A disk is a logical block device that is handled by the generic block layer. Usually a disk
corresponds to a hardware block device such as a hard disk, a floppy disk, or a CD-ROM disk
However, a disk can be a virtual device built upon several physical disk partitions, or a storage area living in some dedicated pages of RAM. In any case, the upper kernel components operate on all disks in the same way thanks to the services offered by the generic block layer.
 */
struct gendisk {
        /* major, first_minor and minors are input parameters only,
         * don't use directly.  Use disk_devt() and disk_max_parts().
         */
        int major;                      /* major number of driver */
        int first_minor;
        int minors;                     /* maximum number of minors, =1 for
                                         * disks that can't be partitioned. */

        char disk_name[DISK_NAME_LEN];  /* name of major driver */
        char *(*devnode)(struct gendisk *gd, mode_t *mode);
        /* Array of pointers to partitions indexed by partno.
         * Protected with matching bdev lock but stat and other
         * non-critical accesses use RCU.  Always access through
         * helpers.
         */
        struct disk_part_tbl *part_tbl;
        struct hd_struct part0;

        const struct block_device_operations *fops;
        struct request_queue *queue;
        void *private_data;

        int flags;
        struct device *driverfs_dev;  // FIXME: remove
        struct kobject *slave_dir;

        struct timer_rand_state *random;

        atomic_t sync_io;               /* RAID */
        struct work_struct async_notify;
#ifdef  CONFIG_BLK_DEV_INTEGRITY
        struct blk_integrity *integrity;
#endif
        int node_id;
};

include/linux/blkdev.h
struct request_queue
{
        /*
         * Together with queue_head for cacheline sharing
         */
        struct list_head        queue_head;
        struct request          *last_merge;
        struct elevator_queue   *elevator;

        /*
         * the queue request freelist, one for reads and one for writes
         */
        struct request_list     rq;

        request_fn_proc         *request_fn;
        make_request_fn         *make_request_fn;
        prep_rq_fn              *prep_rq_fn;
        unplug_fn               *unplug_fn;
        merge_bvec_fn           *merge_bvec_fn;
        prepare_flush_fn        *prepare_flush_fn;
        softirq_done_fn         *softirq_done_fn;
        rq_timed_out_fn         *rq_timed_out_fn;
        dma_drain_needed_fn     *dma_drain_needed;
        lld_busy_fn             *lld_busy_fn;

        /*
         * Dispatch queue sorting
         */
        sector_t                end_sector;
        struct request          *boundary_rq;

        /*
         * Auto-unplugging state
         */
        struct timer_list       unplug_timer;
        int                     unplug_thresh;  /* After this many requests */
        unsigned long           unplug_delay;   /* After this many jiffies */
        struct work_struct      unplug_work;

        struct backing_dev_info backing_dev_info;

        /*
         * The queue owner gets to use this for whatever they like.
         * ll_rw_blk doesn't touch it.
         */
        void                    *queuedata;

        /*
         * queue needs bounce pages for pages above this limit
         */
        gfp_t                   bounce_gfp;

        /*
         * various queue flags, see QUEUE_* below
         */
        unsigned long           queue_flags;

        /*
         * protects queue structures from reentrancy. ->__queue_lock should
         * _never_ be used directly, it is queue private. always use
         * ->queue_lock.
         */
        spinlock_t              __queue_lock;
        spinlock_t              *queue_lock;

        /*
         * queue kobject
         */
        struct kobject kobj;

        /*
         * queue settings
         */
        unsigned long           nr_requests;    /* Max # of requests */
        unsigned int            nr_congestion_on;
        unsigned int            nr_congestion_off;
        unsigned int            nr_batching;
        unsigned int            rq_timeout;
        struct timer_list       timeout;
        struct list_head        timeout_list;

        struct queue_limits     limits;

        /*
         * sg stuff
         */
        unsigned int            sg_timeout;
        unsigned int            sg_reserved_size;
        int                     node;
#ifdef CONFIG_BLK_DEV_IO_TRACE
        struct blk_trace        *blk_trace;
#endif
        /*
         * reserved for flush operations
         */
        unsigned int            ordered, next_ordered, ordseq;
        int                     orderr, ordcolor;
        struct request          pre_flush_rq, bar_rq, post_flush_rq;
        struct request          *orig_bar_rq;

        struct mutex            sysfs_lock;

#if defined(CONFIG_BLK_DEV_BSG)
        struct bsg_class_device bsg_dev;
#endif
};

include/linux/blkdev.h
/*
 * try to put the fields that are referenced together in the same cacheline.
 * if you modify this structure, be sure to check block/blk-core.c:rq_init()
 * as well!
 */
struct request {
        struct list_head queuelist;
        struct call_single_data csd; 
        int cpu; 

        struct request_queue *q;

        unsigned int cmd_flags;
        enum rq_cmd_type_bits cmd_type;
        unsigned long atomic_flags;

        /* the following two fields are internal, NEVER access directly */
        sector_t __sector;              /* sector cursor */
        unsigned int __data_len;        /* total data len */

        struct bio *bio;
        struct bio *biotail;

        struct hlist_node hash; /* merge hash */
        /*   
         * The rb_node is only used inside the io scheduler, requests
         * are pruned when moved to the dispatch queue. So let the
         * completion_data share space with the rb_node.
         */
        union {
                struct rb_node rb_node; /* sort/lookup */
                void *completion_data;
        };   

        /*   
         * two pointers are available for the IO schedulers, if they need
         * more they have to dynamically allocate it.
         */
        void *elevator_private;
        void *elevator_private2;

        struct gendisk *rq_disk;
        unsigned long start_time;

        /* Number of scatter-gather DMA addr+len pairs after
         * physical address coalescing is performed.
         */
        unsigned short nr_phys_segments;

        unsigned short ioprio;

        void *special;          /* opaque pointer available for LLD use */
        char *buffer;           /* kaddr of the current segment if available */

        int tag; 
        int errors;

        int ref_count;

        /*   
         * when request is used as a packet command carrier
         */
        unsigned short cmd_len;
        unsigned char __cmd[BLK_MAX_CDB];
        unsigned char *cmd;

        unsigned int extra_len; /* length of alignment and padding */
        unsigned int sense_len;
        unsigned int resid_len; /* residual count */
        void *sense;

        unsigned long deadline;
        struct list_head timeout_list;
        unsigned int timeout;
        int retries;

        /*   
         * completion callback.
         */
        rq_end_io_fn *end_io;
        void *end_io_data;

        /* for bidi */
        struct request *next_rq;
};

include/linux/bio.h
/*
 * main unit of I/O for the block layer and lower layers (ie drivers and
 * stacking drivers)
 */
/* The contents of a bio descriptor keep changing during the block I/O operation.For instance,
if the block device driver cannot perform the whole data transfer with one scatter-gather DMA
operation, the bi_idx field is updated to keep track of the first segment in the bio that is
yet to be transferred. To iterate over the segments of a bio starting from the current segment
at index bi_idxa device driver can execute the bio_for_each_segment macro.
   When the generic block layer starts a new I/O operation, it allocates a new bio structure by
invoking the bio_alloc( ) function. Usually, bios are allocated through the slab allocator, but the kernel also keeps a small memory pool of bios to be used when memory is scarce (see the section "Memory Pools" in Chapter 8). The kernel also keeps a memory pool for the bio_vec
structuresafter all, it would not make sense to allocate a bio without being able to allocate the segment descriptors to be included in the bio. Correspondingly, the bio_put( ) function
decrements the reference counter (bi_cnt) of a bio and, if the counter becomes zero, it releases
the bio structure and the related bio_vec structures.
 */
struct bio {
        sector_t                bi_sector;      /* device address in 512 byte
                                                   sectors */
        struct bio              *bi_next;       /* request queue link */
        struct block_device     *bi_bdev;
        unsigned long           bi_flags;       /* status, command, etc */
        unsigned long           bi_rw;          /* bottom bits READ/WRITE,
                                                 * top bits priority
                                                 */

        unsigned short          bi_vcnt;        /* how many bio_vec's */
        unsigned short          bi_idx;         /* current index into bvl_vec */

        /* Number of segments in this BIO after
         * physical address coalescing is performed.
         */
        unsigned int            bi_phys_segments;

        unsigned int            bi_size;        /* residual I/O count */

        /*
         * To keep track of the max segment size, we account for the
         * sizes of the first and last mergeable segments in this bio.
         */
        unsigned int            bi_seg_front_size;
        unsigned int            bi_seg_back_size;

        unsigned int            bi_max_vecs;    /* max bvl_vecs we can hold */

        unsigned int            bi_comp_cpu;    /* completion CPU */

        atomic_t                bi_cnt;         /* pin count */

        struct bio_vec          *bi_io_vec;     /* the actual vec list */

        bio_end_io_t            *bi_end_io;

        void                    *bi_private;
#if defined(CONFIG_BLK_DEV_INTEGRITY)
        struct bio_integrity_payload *bi_integrity;  /* data integrity */
#endif

        bio_destructor_t        *bi_destructor; /* destructor */

        /*
         * We can inline a number of vecs at the end of the bio, to avoid
         * double allocations for a small number of bio_vecs. This member
         * MUST obviously be kept at the very end of the bio.
         */
        struct bio_vec          bi_inline_vecs[0];
};

/* 
 * was unsigned short, but we might as well be ready for > 64kB I/O pages
 */
struct bio_vec {
        struct page     *bv_page;
        unsigned int    bv_len;
        unsigned int    bv_offset;
};

//the function of bio: a buffer between block device & fs.
//should to study address_space, address_space_operations, read the code RAMDISK
//read the ULK chapter 15 16

阅读(1325) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~