块设备在linux里的层次结构是很复杂的,一般都是文件系统和块设备打交道,当然了我们也可以自己直接操作块设备,看下面这个结构
struct file_operations def_blk_fops = {
.open = blkdev_open,
.release = blkdev_close,
.llseek = block_llseek,
.read = generic_file_read,
.write = blkdev_file_write,
.aio_read = generic_file_aio_read,
.aio_write = blkdev_file_aio_write,
.mmap = generic_file_mmap,
.fsync = block_fsync,
.unlocked_ioctl = block_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_blkdev_ioctl,
#endif
.readv = generic_file_readv,
.writev = generic_file_write_nolock,
.sendfile = generic_file_sendfile,
};
所有对块设备的操作都通过这个函数集来进行,但这里的函数和块设备之间又隔了一层,那就是磁盘高速缓存,再由磁盘高速缓存来操作块设
备;当打开一个块设备文件时是如何找到具体的块设备,当注册块设备时把相应的结构放到哪里了,它们是如何联系起来的;有两个方向,一个是从设备结点到打开块设备,这是从上到下;另一个是从最基本的一个Flash分区到它表显为一个块设备可以让上层对它操作。我先从下往上说。
一、块设备的注册过程。
NAND flash驱动注册
static int __init s3c2410_nand_init(void)
{
return driver_register(&s3c2410_nand_driver);
}
static struct device_driver s3c2410_nand_driver = {
.name = "s3c2410-nand",
.bus = &platform_bus_type,
.probe = s3c2410_nand_probe,
.remove = s3c2410_nand_remove,
};
会调用s3c2410_nand_probe
static int s3c24xx_nand_probe(struct device *dev, int is_s3c2440)
{
struct platform_device *pdev = to_platform_device(dev);
struct s3c2410_platform_nand *plat = to_nand_plat(dev);
struct s3c2410_nand_info *info;
struct s3c2410_nand_mtd *nmtd;
struct s3c2410_nand_set *sets;
struct resource *res;
int err = 0;
int size;
int nr_sets;
int setno;
pr_debug("s3c2410_nand_probe(%p)\n", dev);
//allocate struct s3c2410_nand_info
info = kmalloc(sizeof(*info), GFP_KERNEL);
if (info == NULL) {
printk(KERN_ERR PFX "no memory for flash info\n");
err = -ENOMEM;
goto exit_error;
}
memzero(info, sizeof(*info));
//setting as private date of dev.
dev_set_drvdata(dev, info);
spin_lock_init(&info->controller.lock);
init_waitqueue_head(&info->controller.wq);
// get the clock source and enable it
info->clk = clk_get(dev, "nand");
if (IS_ERR(info->clk)) {
printk(KERN_ERR PFX "failed to get clock");
err = -ENOENT;
goto exit_error;
}
clk_use(info->clk);
clk_enable(info->clk);
/* allocate and map the resource */
/* currently we assume we have the one resource */
res = pdev->resource;
size = res->end - res->start + 1;
info->area = request_mem_region(res->start, size, pdev->name);
if (info->area == NULL) {
printk(KERN_ERR PFX "cannot reserve register region\n");
err = -ENOENT;
goto exit_error;
}
info->device = dev;
info->platform = plat;
info->regs = ioremap(res->start, size);
info->is_s3c2440 = is_s3c2440;
if (info->regs == NULL) {
printk(KERN_ERR PFX "cannot reserve register region\n");
err = -EIO;
goto exit_error;
}
printk(KERN_INFO PFX "mapped registers at %p\n", info->regs);
// initialise the hardware
err = s3c2410_nand_inithw(info, dev);
if (err != 0)
goto exit_error;
sets = (plat != NULL) ? plat->sets : NULL;
nr_sets = (plat != NULL) ? plat->nr_sets : 1;
info->mtd_count = nr_sets;
/* allocate our information */
size = nr_sets * sizeof(*info->mtds);
info->mtds = kmalloc(size, GFP_KERNEL);
if (info->mtds == NULL) {
printk(KERN_ERR PFX "failed to allocate mtd storage\n");
err = -ENOMEM;
goto exit_error;
}
memzero(info->mtds, size);
/* initialise all possible chips */
nmtd = info->mtds;
printk("#-----------------------nr_sets = %d\n",nr_sets);
//这里只有一块NAND
for (setno = 0; setno < nr_sets; setno++, nmtd++) {
pr_debug("initialising set %d (%p, info %p)\n",
setno, nmtd, info);
//初始化每个块NAND
s3c2410_nand_init_chip(info, nmtd, sets);
//对当前NAND的主分区进行初始化。
nmtd->scan_res = nand_scan(&nmtd->mtd,
(sets) ? sets->nr_chips : 1);
if (nmtd->scan_res == 0) {
//注册flash上的所有分区为块设备
s3c2410_nand_add_partition(info, nmtd, sets);
}
if (sets != NULL)
sets++;
}
pr_debug("initialised ok\n");
return 0;
exit_error:
s3c2410_nand_remove(dev);
if (err == 0)
err = -EINVAL;
return err;
}
static int s3c2410_nand_add_partition(struct s3c2410_nand_info *info,
struct s3c2410_nand_mtd *mtd,
struct s3c2410_nand_set *set)
{
if (set == NULL)
return add_mtd_device(&mtd->mtd);
//这里会把一个flash上的所有分区都注册成MTD设备,我这里是5个。
if (set->nr_partitions > 0 && set->partitions != NULL) {
printk("#----------------add_mtd_partitions(,,%d)\n",set->nr_partitions);
return add_mtd_partitions(&mtd->mtd,
set->partitions,
set->nr_partitions);
}
return add_mtd_device(&mtd->mtd);
}
int add_mtd_partitions(struct mtd_info *master,
const struct mtd_partition *parts,
int nbparts)
{
struct mtd_part *slave;
u_int32_t cur_offset = 0;
int i;
printk (KERN_NOTICE "Creating %d MTD partitions on \"%s\":\n", nbparts, master->name);
//处理每一个分区。
for (i = 0; i < nbparts; i++)
{
/* allocate the partition structure */
slave = kmalloc (sizeof(*slave), GFP_KERNEL);
if (!slave) {
printk ("memory allocation error while creating partitions for \"%s\"\n",
master->name);
del_mtd_partitions(master);
return -ENOMEM;
}
memset(slave, 0, sizeof(*slave));
list_add(&slave->list, &mtd_partitions);
/* set up the MTD object for this partition */
slave->mtd.type = master->type;
slave->mtd.flags = master->flags & ~parts[i].mask_flags;
slave->mtd.size = parts[i].size;
slave->mtd.oobblock = master->oobblock;
slave->mtd.oobsize = master->oobsize;
slave->mtd.ecctype = master->ecctype;
slave->mtd.eccsize = master->eccsize;
slave->mtd.name = parts[i].name;
slave->mtd.bank_size = master->bank_size;
slave->mtd.owner = master->owner;
slave->mtd.read = part_read;
slave->mtd.write = part_write;
if(master->point && master->unpoint){
slave->mtd.point = part_point;
slave->mtd.unpoint = part_unpoint;
}
if (master->read_ecc)
slave->mtd.read_ecc = part_read_ecc;
if (master->write_ecc)
slave->mtd.write_ecc = part_write_ecc;
if (master->read_oob)
slave->mtd.read_oob = part_read_oob;
if (master->write_oob)
slave->mtd.write_oob = part_write_oob;
if(master->read_user_prot_reg)
slave->mtd.read_user_prot_reg = part_read_user_prot_reg;
if(master->read_fact_prot_reg)
slave->mtd.read_fact_prot_reg = part_read_fact_prot_reg;
if(master->write_user_prot_reg)
slave->mtd.write_user_prot_reg = part_write_user_prot_reg;
if(master->lock_user_prot_reg)
slave->mtd.lock_user_prot_reg = part_lock_user_prot_reg;
if(master->get_user_prot_info)
slave->mtd.get_user_prot_info = part_get_user_prot_info;
if(master->get_fact_prot_info)
slave->mtd.get_fact_prot_info = part_get_fact_prot_info;
if (master->sync)
slave->mtd.sync = part_sync;
if (!i && master->suspend && master->resume) {
slave->mtd.suspend = part_suspend;
slave->mtd.resume = part_resume;
}
if (master->writev)
slave->mtd.writev = part_writev;
if (master->readv)
slave->mtd.readv = part_readv;
if (master->writev_ecc)
slave->mtd.writev_ecc = part_writev_ecc;
if (master->readv_ecc)
slave->mtd.readv_ecc = part_readv_ecc;
if (master->lock)
slave->mtd.lock = part_lock;
if (master->unlock)
slave->mtd.unlock = part_unlock;
if (master->block_isbad)
slave->mtd.block_isbad = part_block_isbad;
if (master->block_markbad)
slave->mtd.block_markbad = part_block_markbad;
slave->mtd.erase = part_erase;
slave->master = master;
slave->offset = parts[i].offset;
slave->index = i;
if (slave->offset == MTDPART_OFS_APPEND)
slave->offset = cur_offset;
if (slave->offset == MTDPART_OFS_NXTBLK) {
u_int32_t emask = master->erasesize-1;
slave->offset = (cur_offset + emask) & ~emask;
if (slave->offset != cur_offset) {
printk(KERN_NOTICE "Moving partition %d: "
"0x%08x -> 0x%08x\n", i,
cur_offset, slave->offset);
}
}
if (slave->mtd.size == MTDPART_SIZ_FULL)
slave->mtd.size = master->size - slave->offset;
cur_offset = slave->offset + slave->mtd.size;
printk (KERN_NOTICE "0x%08x-0x%08x : \"%s\"\n", slave->offset,
slave->offset + slave->mtd.size, slave->mtd.name);
/* let's do some sanity checks */
if (slave->offset >= master->size) {
/* let's register it anyway to preserve ordering */
slave->offset = 0;
slave->mtd.size = 0;
printk ("mtd: partition \"%s\" is out of reach -- disabled\n",
parts[i].name);
}
if (slave->offset + slave->mtd.size > master->size) {
slave->mtd.size = master->size - slave->offset;
printk ("mtd: partition \"%s\" extends beyond the end of device \"%s\" -- size truncated to %#x\n",
parts[i].name, master->name, slave->mtd.size);
}
if (master->numeraseregions>1) {
/* Deal with variable erase size stuff */
int i;
struct mtd_erase_region_info *regions = master->eraseregions;
/* Find the first erase regions which is part of this partition. */
for (i=0; i < master->numeraseregions && slave->offset >= regions[i].offset; i++)
;
for (i--; i < master->numeraseregions && slave->offset + slave->mtd.size > regions[i].offset; i++) {
if (slave->mtd.erasesize < regions[i].erasesize) {
slave->mtd.erasesize = regions[i].erasesize;
}
}
} else {
/* Single erase size */
slave->mtd.erasesize = master->erasesize;
}
if ((slave->mtd.flags & MTD_WRITEABLE) &&
(slave->offset % slave->mtd.erasesize)) {
/* Doesn't start on a boundary of major erase size */
/* FIXME: Let it be writable if it is on a boundary of _minor_ erase size though */
slave->mtd.flags &= ~MTD_WRITEABLE;
printk ("mtd: partition \"%s\" doesn't start on an erase block boundary -- force read-only\n",
parts[i].name);
}
if ((slave->mtd.flags & MTD_WRITEABLE) &&
(slave->mtd.size % slave->mtd.erasesize)) {
slave->mtd.flags &= ~MTD_WRITEABLE;
printk ("mtd: partition \"%s\" doesn't end on an erase block -- force read-only\n",
parts[i].name);
}
/* copy oobinfo from master */
memcpy(&slave->mtd.oobinfo, &master->oobinfo, sizeof(slave->mtd.oobinfo));
if(parts[i].mtdp)
{ /* store the object pointer (caller may or may not register it */
*parts[i].mtdp = &slave->mtd;
slave->registered = 0;
}
else
{
/* register our partition */
//以分区中原始MTD的身份注册块设备
add_mtd_device(&slave->mtd);
slave->registered = 1;
}
}
return 0;
}
int add_mtd_device(struct mtd_info *mtd)
{
int i;
down(&mtd_table_mutex);
//在原始MTD设备表mtd_table里找到空闲的表项
for (i=0; i < MAX_MTD_DEVICES; i++)
if (!mtd_table[i]) {
struct list_head *this;
mtd_table[i] = mtd;
mtd->index = i;
mtd->usecount = 0;
DEBUG(0, "mtd: Giving out device %d to %s\n",i, mtd->name);
/* No need to get a refcount on the module containing
the notifier, since we hold the mtd_table_mutex */
//通知。。。
list_for_each(this, &mtd_notifiers) {
struct mtd_notifier *not = list_entry(this, struct mtd_notifier, list);
not->add(mtd);
}
up(&mtd_table_mutex);
/* We _know_ we aren't being removed, because
our caller is still holding us here. So none
of this try_ nonsense, and no bitching about it
either. :) */
__module_get(THIS_MODULE);
return 0;
}
up(&mtd_table_mutex);
return 1;
}
上面通知了,那通知给谁了呢?
//MTD块设备初始化。
static int __init init_mtdblock(void)
{
return register_mtd_blktrans(&mtdblock_tr);
}
int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
{
int ret, i;
/* Register the notifier if/when the first device type is
registered, to prevent the link/init ordering from fucking
us over. */
//注册到通知链。
if (!blktrans_notifier.list.next)
register_mtd_user(&blktrans_notifier);
tr->blkcore_priv = kmalloc(sizeof(*tr->blkcore_priv), GFP_KERNEL);
if (!tr->blkcore_priv)
return -ENOMEM;
memset(tr->blkcore_priv, 0, sizeof(*tr->blkcore_priv));
down(&mtd_table_mutex);
//在块设备哈希表里注册。
ret = register_blkdev(tr->major, tr->name);
if (ret) {
printk(KERN_WARNING "Unable to register %s block device on major %d: %d\n",
tr->name, tr->major, ret);
kfree(tr->blkcore_priv);
up(&mtd_table_mutex);
return ret;
}
spin_lock_init(&tr->blkcore_priv->queue_lock);
init_completion(&tr->blkcore_priv->thread_dead);
init_waitqueue_head(&tr->blkcore_priv->thread_wq);
tr->blkcore_priv->rq = blk_init_queue(mtd_blktrans_request, &tr->blkcore_priv->queue_lock);
if (!tr->blkcore_priv->rq) {
unregister_blkdev(tr->major, tr->name);
kfree(tr->blkcore_priv);
up(&mtd_table_mutex);
return -ENOMEM;
}
tr->blkcore_priv->rq->queuedata = tr;
ret = kernel_thread(mtd_blktrans_thread, tr, CLONE_KERNEL);
if (ret < 0) {
blk_cleanup_queue(tr->blkcore_priv->rq);
unregister_blkdev(tr->major, tr->name);
kfree(tr->blkcore_priv);
up(&mtd_table_mutex);
return ret;
}
devfs_mk_dir(tr->name);
INIT_LIST_HEAD(&tr->devs);
//注册到一个链表里,MTD设备的增加或去除会被通知。
list_add(&tr->list, &blktrans_majors);
for (i=0; i if (mtd_table[i] && mtd_table[i]->type != MTD_ABSENT)
tr->add_mtd(tr, mtd_table[i]);
}
up(&mtd_table_mutex);
return 0;
}
static struct mtd_notifier blktrans_notifier = {
.add = blktrans_notify_add,
.remove = blktrans_notify_remove,
};
static void blktrans_notify_add(struct mtd_info *mtd)
{
struct list_head *this;
if (mtd->type == MTD_ABSENT)
return;
//调用所有的MTD主设备的方法
list_for_each(this, &blktrans_majors) {
struct mtd_blktrans_ops *tr = list_entry(this, struct mtd_blktrans_ops, list);
tr->add_mtd(tr, mtd);
}
}
static struct mtd_blktrans_ops mtdblock_tr = {
.name = "mtdblock",
.major = 31,
.part_bits = 0,
.open = mtdblock_open,
.flush = mtdblock_flush,
.release = mtdblock_release,
.readsect = mtdblock_readsect,
.writesect = mtdblock_writesect,
.add_mtd = mtdblock_add_mtd,
.remove_dev = mtdblock_remove_dev,
.owner = THIS_MODULE,
};
//上面的通知最后由这个函数来处理。
static void mtdblock_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
{
struct mtd_blktrans_dev *dev = kmalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
return;
memset(dev, 0, sizeof(*dev));
//MTD翻译层块设备成员初始化。
dev->mtd = mtd;
dev->devnum = mtd->index;
dev->blksize = 512;
dev->size = mtd->size >> 9;
dev->tr = tr;
if (!(mtd->flags & MTD_WRITEABLE))
dev->readonly = 1;
//MTD翻译层块设备注册
add_mtd_blktrans_dev(dev);
}
//MTD翻译层块设备注册
int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
{
struct mtd_blktrans_ops *tr = new->tr;
struct list_head *this;
int last_devnum = -1;
struct gendisk *gd;
if (!down_trylock(&mtd_table_mutex)) {
up(&mtd_table_mutex);
BUG();
}
//检查分区是否合法
list_for_each(this, &tr->devs) {
struct mtd_blktrans_dev *d = list_entry(this, struct mtd_blktrans_dev, list);
if (new->devnum == -1)
{
/* Use first free number */
if (d->devnum != last_devnum+1) {
/* Found a free devnum. Plug it in here */
new->devnum = last_devnum+1;
list_add_tail(&new->list, &d->list);
goto added;
}
}
else if (d->devnum == new->devnum)
{
/* Required number taken */
return -EBUSY;
}
else if (d->devnum > new->devnum)
{
/* Required number was free */
list_add_tail(&new->list, &d->list);
goto added;
}
last_devnum = d->devnum;
}
if (new->devnum == -1)
new->devnum = last_devnum+1;
if ((new->devnum << tr->part_bits) > 256) {
return -EBUSY;
}
init_MUTEX(&new->sem);
//链接到MTD翻译层结构
list_add_tail(&new->list, &tr->devs);
added:
if (!tr->writesect)
new->readonly = 1;
//分配一个通用磁盘结构。
gd = alloc_disk(1 << tr->part_bits);
if (!gd) {
list_del(&new->list);
return -ENOMEM;
}
gd->major = tr->major;
gd->first_minor = (new->devnum) << tr->part_bits;
//?????
gd->fops = &mtd_blktrans_ops;
snprintf(gd->disk_name, sizeof(gd->disk_name),
"%s%c", tr->name, (tr->part_bits?'a':'0') + new->devnum);
snprintf(gd->devfs_name, sizeof(gd->devfs_name),
"%s/%c", tr->name, (tr->part_bits?'a':'0') + new->devnum);
/* 2.5 has capacity in units of 512 bytes while still
having BLOCK_SIZE_BITS set to 10. Just to keep us amused. */
set_capacity(gd, (new->size * new->blksize) >> 9);
gd->private_data = new;
new->blkcore_priv = gd;
gd->queue = tr->blkcore_priv->rq;
if (new->readonly)
set_disk_ro(gd, 1);
add_disk(gd);
return 0;
}
void add_disk(struct gendisk *disk)
{
disk->flags |= GENHD_FL_UP;
//注册到块设备位图,这里是关键,当打开块设备时就从这个位图里找,也是这里是上与上的连接点。
blk_register_region(MKDEV(disk->major, disk->first_minor),disk->minors, NULL, exact_match, exact_lock, disk);
register_disk(disk);
blk_register_queue(disk);
}
void blk_register_region(dev_t dev, unsigned long range, struct module *module,
struct kobject *(*probe)(dev_t, int *, void *),
int (*lock)(dev_t, void *), void *data)
{
kobj_map(bdev_map, dev, range, module, probe, lock, data);
}
void register_disk(struct gendisk *disk)
{
struct block_device *bdev;
char *s;
int err;
strlcpy(disk->kobj.name,disk->disk_name,KOBJ_NAME_LEN);
/* e some of these buggers have / in name... */
s = strchr(disk->kobj.name, '/');
if (s)
*s = '!';
//增加到sysfs
if ((err = kobject_add(&disk->kobj)))
return;
disk_sysfs_symlinks(disk);
kobject_hotplug(&disk->kobj, KOBJ_ADD);
/* No minors to use for partitions */
printk("#1--------------register_disk disk->minors=%d\n",disk->minors);
if (disk->minors == 1) {
if (disk->devfs_name[0] != '\0')
//在dev下创建块设备。到这里所有创建工作总算完事了。
devfs_add_disk(disk);
return;
}
printk("#2--------------register_disk disk->minors=%d\n",disk->minors);
/* always add handle for the whole disk */
devfs_add_partitioned(disk);
/* No such device (e.g., media were just removed) */
if (!get_capacity(disk))
return;
bdev = bdget_disk(disk, 0);
if (!bdev)
return;
bdev->bd_invalidated = 1;
if (blkdev_get(bdev, FMODE_READ, 0) < 0)
return;
blkdev_put(bdev);
}
到此为止在dev文件系统下已经创建了结点,现在可以打开设备了。
2、块设备的打开。
先做一下准备工作。
//对块设备进行整体初始化。
static int __init genhd_device_init(void)
{
//初始化块设备位图。
bdev_map = kobj_map_init(base_probe, &block_subsys_sem);
//块设备相关结构高速缓存分配
blk_dev_init();
printk("###genhd_device_init %s\n",block_subsys.kset.kobj.name);
//在 sys文件系统下创建/sys/block
subsystem_register(&block_subsys);
return 0;
}
subsys_initcall(genhd_device_init);
//由start_kernel调用。
void __init vfs_caches_init(unsigned long mempages)
{
unsigned long reserve;
/* Base hash sizes on available memory, with a reserve equal to
150% of current kernel size */
reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
mempages -= reserve;
names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor);
dcache_init(mempages);
inode_init(mempages);
files_init(mempages);
mnt_init(mempages);
bdev_cache_init();
chrdev_init();
}
struct bdev_inode {
//块设备结构
struct block_device bdev;
//在块设备伪文件系统中的I节点。
struct inode vfs_inode;
};
//块设备文件系统。
static struct file_system_type bd_type = {
.name = "bdev",
.get_sb = bd_get_sb,
.kill_sb = kill_anon_super,
};
//块设备伪文件系统安装信息。
static struct vfsmount *bd_mnt;
//块设备伪文件系统的超级块。
struct super_block *blockdev_superblock;
void __init bdev_cache_init(void)
{
int err;
bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_PANIC,
init_once, NULL);
//注册块设备文件系统。
err = register_filesystem(&bd_type);
if (err)
panic("Cannot register bdev pseudo-fs");
//创建一个vfsmount但并不没有真正的安装点。
bd_mnt = kern_mount(&bd_type);
err = PTR_ERR(bd_mnt);
if (IS_ERR(bd_mnt))
panic("Cannot create bdev pseudo-fs");
blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */
}
为块设备还注册了一个文件系统用,可以看出分配一个I节点是就分配了struct block_device.
相关结构。
struct block_device
{
//块设备号。
dev_t bd_dev; /* not a kdev_t - it's a search key */
//块设备伪文件系统的I节点。
struct inode * bd_inode; /* will die */
int bd_openers;
struct semaphore bd_sem; /* open/close mutex */
struct semaphore bd_mount_sem; /* mount mutex */
//所有对应当前块设备的I节点。
struct list_head bd_inodes;
void * bd_holder;
int bd_holders;
struct block_device * bd_contains;
//块大小。
unsigned bd_block_size;
struct hd_struct * bd_part;
/* number of times partitions within this device have been opened. */
unsigned bd_part_count;
int bd_invalidated;
struct gendisk * bd_disk;
//属于所有块设备的一个成员。
struct list_head bd_list;
struct backing_dev_info *bd_inode_backing_dev_info;
/*
* Private data. You must have bd_claim'ed the block_device
* to use this. NOTE: bd_claim allows an owner to claim
* the same device multiple times, the owner must take special
* care to not mess up bd_private for that case.
*/
unsigned long bd_private;
};
struct gendisk {
//块设备的主设备号
int major; /* major number of driver */
//第一个次设备号
int first_minor;
//次设备号范围。
int minors; /* maximum number of minors, =1 for
//mtdblock0 * disks that can't be partitioned. */
char disk_name[32]; /* name of major driver */
struct hd_struct **part; /* [indexed by minor] */
//对应的块设备操作。
struct block_device_operations *fops;
//请求队列
struct request_queue *queue;
//struct mtd_blktrans_dev,MTD翻译层设备
void *private_data;
sector_t capacity;
int flags;
//mtdblock/0
char devfs_name[64]; /* devfs crap */
int number; /* more of the same */
struct device *driverfs_dev;
struct kobject kobj;
struct timer_rand_state *random;
int policy;
atomic_t sync_io; /* RAID */
unsigned long stamp, stamp_idle;
int in_flight;
#ifdef CONFIG_SMP
struct disk_stats *dkstats;
#else
struct disk_stats dkstats;
#endif
};
这两个结构是对块设备不同层次的描述。
开始blkdev_open。
static int blkdev_open(struct inode * inode, struct file * filp)
{
struct block_device *bdev;
int res;
/*
* Preserve backwards compatibility and allow large file access
* even if userspace doesn't ask for it explicitly. Some mkfs
* binary needs it. We might want to drop this workaround
* during an unstable branch.
*/
printk("#-------------------------blkdev_open\n");
filp->f_flags |= O_LARGEFILE;
//查找或分配struct block_devece.
bdev = bd_acquire(inode);
res = do_open(bdev, filp);
if (res)
return res;
if (!(filp->f_flags & O_EXCL) )
return 0;
if (!(res = bd_claim(bdev, filp)))
return 0;
blkdev_put(bdev);
return res;
}
static struct block_device *bd_acquire(struct inode *inode)
{
struct block_device *bdev;
spin_lock(&bdev_lock);
bdev = inode->i_bdev;
//是否已经打开过
if (bdev && igrab(bdev->bd_inode)) {
spin_unlock(&bdev_lock);
return bdev;
}
spin_unlock(&bdev_lock);
//根据设备号到块设备伪文件系统中去找或分配block_device结构。
bdev = bdget(inode->i_rdev);
if (bdev) {
spin_lock(&bdev_lock);
if (inode->i_bdev)
__bd_forget(inode);
inode->i_bdev = bdev;
inode->i_mapping = bdev->bd_inode->i_mapping;
list_add(&inode->i_devices, &bdev->bd_inodes);
spin_unlock(&bdev_lock);
}
return bdev;
}
struct block_device *bdget(dev_t dev)
{
struct block_device *bdev;
struct inode *inode;
inode = iget5_locked(bd_mnt->mnt_sb, hash(dev),
bdev_test, bdev_set, &dev);
if (!inode)
return NULL;
//把I节点转成block_deivce
bdev = &BDEV_I(inode)->bdev;
//如果新分配的。
if (inode->i_state & I_NEW) {
bdev->bd_contains = NULL;
bdev->bd_inode = inode;
bdev->bd_block_size = (1 << inode->i_blkbits);
bdev->bd_part_count = 0;
bdev->bd_invalidated = 0;
inode->i_mode = S_IFBLK;
inode->i_rdev = dev;
inode->i_bdev = bdev;
inode->i_data.a_ops = &def_blk_aops;
mapping_set_gfp_mask(&inode->i_data, GFP_USER);
inode->i_data.backing_dev_info = &default_backing_dev_info;
spin_lock(&bdev_lock);
list_add(&bdev->bd_list, &all_bdevs);
spin_unlock(&bdev_lock);
unlock_new_inode(inode);
}
return bdev;
}
struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
int (*test)(struct inode *, void *),
int (*set)(struct inode *, void *), void *data)
{
struct hlist_head *head = inode_hashtable + hash(sb, hashval);
struct inode *inode;
//查找I节点。
inode = ifind(sb, head, test, data, 1);
if (inode)
return inode;
/*
* get_new_inode() will do the right thing, re-trying the search
* in case it had to block at any point.
*/
//分配一个新的I节点。当然这里是用块设备文件系统分配的I节点。
return get_new_inode(sb, head, test, set, data);
}
static int do_open(struct block_device *bdev, struct file *file)
{
struct module *owner = NULL;
struct gendisk *disk;
int ret = -ENXIO;
int part;
file->f_mapping = bdev->bd_inode->i_mapping;
lock_kernel();
//返回设备号对应的struct gendisk.
disk = get_gendisk(bdev->bd_dev, &part);
if (!disk) {
unlock_kernel();
bdput(bdev);
return ret;
}
owner = disk->fops->owner;
down(&bdev->bd_sem);
if (!bdev->bd_openers)
{
bdev->bd_disk = disk;
bdev->bd_contains = bdev;
if (!part)
{
struct backing_dev_info *bdi;
if (disk->fops->open)
{
ret = disk->fops->open(bdev->bd_inode, file);
if (ret)
goto out_first;
}
if (!bdev->bd_openers) {
bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
bdi = blk_get_backing_dev_info(bdev);
if (bdi == NULL)
bdi = &default_backing_dev_info;
bdev->bd_inode->i_data.backing_dev_info = bdi;
}
if (bdev->bd_invalidated)
rescan_partitions(disk, bdev);
}
else
{
struct hd_struct *p;
struct block_device *whole;
whole = bdget_disk(disk, 0);
ret = -ENOMEM;
if (!whole)
goto out_first;
ret = blkdev_get(whole, file->f_mode, file->f_flags);
if (ret)
goto out_first;
bdev->bd_contains = whole;
down(&whole->bd_sem);
whole->bd_part_count++;
p = disk->part[part - 1];
bdev->bd_inode->i_data.backing_dev_info =
whole->bd_inode->i_data.backing_dev_info;
if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) {
whole->bd_part_count--;
up(&whole->bd_sem);
ret = -ENXIO;
goto out_first;
}
kobject_get(&p->kobj);
bdev->bd_part = p;
bd_set_size(bdev, (loff_t) p->nr_sects << 9);
up(&whole->bd_sem);
}
}
else
{
put_disk(disk);
module_put(owner);
if (bdev->bd_contains == bdev)
{
if (bdev->bd_disk->fops->open)
{
ret = bdev->bd_disk->fops->open(bdev->bd_inode, file);
if (ret)
goto out;
}
if (bdev->bd_invalidated)
rescan_partitions(bdev->bd_disk, bdev);
}
else
{
down(&bdev->bd_contains->bd_sem);
bdev->bd_contains->bd_part_count++;
up(&bdev->bd_contains->bd_sem);
}
}
bdev->bd_openers++;
up(&bdev->bd_sem);
unlock_kernel();
return 0;
out_first:
bdev->bd_disk = NULL;
bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
if (bdev != bdev->bd_contains)
blkdev_put(bdev->bd_contains);
bdev->bd_contains = NULL;
put_disk(disk);
module_put(owner);
out:
up(&bdev->bd_sem);
unlock_kernel();
if (ret)
bdput(bdev);
return ret;
}
//这是接头的地方。
struct gendisk *get_gendisk(dev_t dev, int *part)
{
struct kobject *kobj = kobj_lookup(bdev_map, dev, part);
return kobj ? to_disk(kobj) : NULL;
}