在具体开讲今天的内容之前,由于块设备比字符设备稍显复杂,需要先把一些情况交代清楚:
1. 一个块设备使用一个struct gendisk表示(设备号与该结构存储在bdev_map当中,关于该块设备的整体操作都在这个结构里面表示),里面有对应的分区信息
-
struct gendisk {
-
/* major, first_minor and minors are input parameters only,
-
* don't use directly. Use disk_devt() and disk_max_parts().
-
*/
-
int major; /* major number of driver */
-
int first_minor;
-
int minors; /* maximum number of minors, =1 for
-
* disks that can't be partitioned. */
-
-
char disk_name[DISK_NAME_LEN]; /* name of major driver */
-
-
unsigned short events; /* supported events */
-
unsigned short event_flags; /* flags related to event processing */
-
-
/* Array of pointers to partitions indexed by partno.
-
* Protected with matching bdev lock but stat and other
-
* non-critical accesses use RCU. Always access through
-
* helpers.
-
*/
-
struct disk_part_tbl __rcu *part_tbl;
-
struct hd_struct part0;// 整个块设备占用0号分区
-
-
const struct block_device_operations *fops;
-
struct request_queue *queue;
-
void *private_data;
-
-
int flags;
-
struct rw_semaphore lookup_sem;
-
struct kobject *slave_dir;
-
-
struct timer_rand_state *random;
-
atomic_t sync_io; /* RAID */
-
struct disk_events *ev;
-
#ifdef CONFIG_BLK_DEV_INTEGRITY
-
struct kobject integrity_kobj;
-
#endif /* CONFIG_BLK_DEV_INTEGRITY */
-
int node_id;
-
struct badblocks *bb;
-
struct lockdep_map lockdep_map;
-
};
2. 一个块设备的每一个被打开分区,会对应一个struct block_device结构:
-
struct block_device {
-
dev_t bd_dev; /* not a kdev_t - it's a search key */
-
int bd_openers;
-
struct inode * bd_inode; /* will die */
-
struct super_block * bd_super;
-
struct mutex bd_mutex; /* open/close mutex */
-
void * bd_claiming;
-
void * bd_holder;
-
int bd_holders;
-
bool bd_write_holder;
-
#ifdef CONFIG_SYSFS
-
struct list_head bd_holder_disks;
-
#endif
-
struct block_device * bd_contains;// 如果当前的block_device代表的是分区,则这个指向gendisk对应的block_device实例
-
unsigned bd_block_size;
-
u8 bd_partno;
-
struct hd_struct * bd_part;// gendisk当中对应的分区
-
/* number of times partitions within this device have been opened. */
-
unsigned bd_part_count;
-
int bd_invalidated;
-
struct gendisk * bd_disk;// 整个gendisk
-
struct request_queue * bd_queue;
-
struct backing_dev_info *bd_bdi;
-
struct list_head bd_list;
-
/*
-
* Private data. You must have bd_claim'ed the block_device
-
* to use this. NOTE: bd_claim allows an owner to claim
-
* the same device multiple times, the owner must take special
-
* care to not mess up bd_private for that case.
-
*/
-
unsigned long bd_private;
-
-
/* The counter of freeze processes */
-
int bd_fsfreeze_count;
-
/* Mutex for freeze */
-
struct mutex bd_fsfreeze_mutex;
-
} __randomize_layout;
3. 每一个分区(包含整个块设备)都包含在一个伪文件系统bdev当中,这个文件系统对用户来说是透明的,这个伪文件系统的inode结构如下:
-
struct bdev_inode {
-
struct block_device bdev;
-
struct inode vfs_inode;
-
};
关于这个伪文件系统的bdev_inode之前纠结了好久怎么解释,直到这篇文章发送出来之后,才想出个大概(如果错了,后续会填坑),由于是个文件系统,需要对应的inode的所有操作,没必要再实现一次,直接用现成的struct inode 即可,对应的块设备操作操纵struct block_device 即可,这俩是一一对应的关系,封装一起更便于管理(---->struct bdev_inode)
好了,基本交代清楚,接下来就按照打开字符设备的顺序开讲打开块设备的顺序
1. 在init_special_inode函数当中,如果是字符设备,默认i_fop设置的是def_chr_fops;如果是块设备,就是def_blk_fops了,代码如下:
-
void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
-
{
-
inode->i_mode = mode;
-
if (S_ISCHR(mode)) {
-
inode->i_fop = &def_chr_fops;
-
inode->i_rdev = rdev;
-
} else if (S_ISBLK(mode)) {
-
inode->i_fop = &def_blk_fops;
-
inode->i_rdev = rdev;
-
} else if (S_ISFIFO(mode))
-
inode->i_fop = &pipefifo_fops;
-
else if (S_ISSOCK(mode))
-
; /* leave it no_open_fops */
-
else
-
printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
-
" inode %s:%lu\n", mode, inode->i_sb->s_id,
-
inode->i_ino);
-
}
这样,在open的时候,调用的就是blkdev_open了:
-
const struct file_operations def_blk_fops = {
-
.open = blkdev_open,
-
.release = blkdev_close,
-
.llseek = block_llseek,
-
.read_iter = blkdev_read_iter,
-
.write_iter = blkdev_write_iter,
-
.iopoll = blkdev_iopoll,
-
.mmap = generic_file_mmap,
-
.fsync = blkdev_fsync,
-
.unlocked_ioctl = block_ioctl,
-
#ifdef CONFIG_COMPAT
-
.compat_ioctl = compat_blkdev_ioctl,
-
#endif
-
.splice_read = generic_file_splice_read,
-
.splice_write = iter_file_splice_write,
-
.fallocate = blkdev_fallocate,
-
};
-
static int blkdev_open(struct inode * inode, struct file * filp)
-
{
-
struct block_device *bdev;
-
-
/*
-
* Preserve backwards compatibility and allow large file access
-
* even if userspace doesn't ask for it explicitly. Some mkfs
-
* binary needs it. We might want to drop this workaround
-
* during an unstable branch.
-
*/
-
filp->f_flags |= O_LARGEFILE;
-
-
filp->f_mode |= FMODE_NOWAIT;
-
-
if (filp->f_flags & O_NDELAY)
-
filp->f_mode |= FMODE_NDELAY;
-
if (filp->f_flags & O_EXCL)
-
filp->f_mode |= FMODE_EXCL;
-
if ((filp->f_flags & O_ACCMODE) == 3)
-
filp->f_mode |= FMODE_WRITE_IOCTL;
-
-
bdev = bd_acquire(inode);// 这里的inode是/dev/xxx的inode
-
if (bdev == NULL)
-
return -ENOMEM;
-
-
filp->f_mapping = bdev->bd_inode->i_mapping;
-
filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
-
-
return blkdev_get(bdev, filp->f_mode, filp);
-
}
在这里我们看到,blkdev_open 先是通过bd_acquire来根据当前/dev/下的相关inode,来找到对应的block_device结构,这里的具体实现就是根据设备号找到对应的bdev_inode里面的vfs_inode(这一步调用链是bd_acquire->bd_get,bd_get这会涉及到inode缓存,即在一个已装载的文件系统中,根据超级块和inode号[这里是设备号]hash存储了对应inode的相关信息),再通过container_of直接找到bdev_inode->block_device 找到block_device,找到实例之后,就是向字符设备一样的各种设置inode相关的东西了,只不过这里设置的是inode->ibdev而非inode->i_cdev而以,其余的file_operations就类似了
-
struct block_device *bdget(dev_t dev)// /dev/xxx 下inode对应的设备号
-
{
-
struct block_device *bdev;
-
struct inode *inode;
-
// 这里找到的inode 就是bdev_inode里面的vfs_inode
-
inode = iget5_locked(blockdev_superblock, hash(dev),
-
bdev_test, bdev_set, &dev);
-
-
if (!inode)
-
return NULL;
-
// container_of(inode, struct bdev_inode, vfs_inode)
-
bdev = &BDEV_I(inode)->bdev;
-
-
if (inode->i_state & I_NEW) {
-
bdev->bd_contains = NULL;
-
bdev->bd_super = NULL;
-
bdev->bd_inode = inode;
-
bdev->bd_block_size = i_blocksize(inode);
-
bdev->bd_part_count = 0;
-
bdev->bd_invalidated = 0;
-
inode->i_mode = S_IFBLK;
-
inode->i_rdev = dev;
-
inode->i_bdev = bdev;
-
inode->i_data.a_ops = &def_blk_aops;
-
mapping_set_gfp_mask(&inode->i_data, GFP_USER);
-
spin_lock(&bdev_lock);
-
list_add(&bdev->bd_list, &all_bdevs);
-
spin_unlock(&bdev_lock);
-
unlock_new_inode(inode);
-
}
-
return bdev;
-
}
阅读(1904) | 评论(0) | 转发(0) |