Chinaunix首页 | 论坛 | 博客
  • 博客访问: 74593
  • 博文数量: 22
  • 博客积分: 1475
  • 博客等级: 上尉
  • 技术积分: 260
  • 用 户 组: 普通用户
  • 注册时间: 2010-03-04 23:39
文章分类

全部博文(22)

文章存档

2013年(1)

2011年(6)

2010年(15)

我的朋友

分类: LINUX

2010-05-17 16:50:53

 块设备在linux里的层次结构是很复杂的,一般都是文件系统和块设备打交道,当然了我们也可以自己直接操作块设备,看下面这个结构
struct file_operations def_blk_fops = {
 .open  = blkdev_open,
 .release = blkdev_close,
 .llseek  = block_llseek,
 .read  = generic_file_read,
 .write  = blkdev_file_write,
   .aio_read = generic_file_aio_read,
   .aio_write = blkdev_file_aio_write,
 .mmap  = generic_file_mmap,
 .fsync  = block_fsync,
 .unlocked_ioctl = block_ioctl,
#ifdef CONFIG_COMPAT
 .compat_ioctl = compat_blkdev_ioctl,
#endif
 .readv  = generic_file_readv,
 .writev  = generic_file_write_nolock,
 .sendfile = generic_file_sendfile,
};
所有对块设备的操作都通过这个函数集来进行,但这里的函数和块设备之间又隔了一层,那就是磁盘高速缓存,再由磁盘高速缓存来操作块设
备;当打开一个块设备文件时是如何找到具体的块设备,当注册块设备时把相应的结构放到哪里了,它们是如何联系起来的;有两个方向,一个是从设备结点到打开块设备,这是从上到下;另一个是从最基本的一个Flash分区到它表显为一个块设备可以让上层对它操作。我先从下往上说。
一、块设备的注册过程。
NAND flash驱动注册
static int __init s3c2410_nand_init(void)
{
 return driver_register(&s3c2410_nand_driver);
}
static struct device_driver s3c2410_nand_driver = {
 .name  = "s3c2410-nand",
 .bus  = &platform_bus_type,
 .probe  = s3c2410_nand_probe,
 .remove  = s3c2410_nand_remove,
};
会调用s3c2410_nand_probe
static int s3c24xx_nand_probe(struct device *dev, int is_s3c2440)
{
 struct platform_device *pdev = to_platform_device(dev);
 struct s3c2410_platform_nand *plat = to_nand_plat(dev);
 struct s3c2410_nand_info *info;
 struct s3c2410_nand_mtd *nmtd;
 struct s3c2410_nand_set *sets;
 struct resource *res;
 int err = 0;
 int size;
 int nr_sets;
 int setno;
 pr_debug("s3c2410_nand_probe(%p)\n", dev);
 //allocate struct s3c2410_nand_info
 info = kmalloc(sizeof(*info), GFP_KERNEL);
 if (info == NULL) {
  printk(KERN_ERR PFX "no memory for flash info\n");
  err = -ENOMEM;
  goto exit_error;
 }
 memzero(info, sizeof(*info));
 //setting as private date of dev.
 dev_set_drvdata(dev, info);
 spin_lock_init(&info->controller.lock);
 init_waitqueue_head(&info->controller.wq);
 // get the clock source and enable it
 info->clk = clk_get(dev, "nand");
 if (IS_ERR(info->clk)) {
  printk(KERN_ERR PFX "failed to get clock");
  err = -ENOENT;
  goto exit_error;
 }
 clk_use(info->clk);
 clk_enable(info->clk);
 /* allocate and map the resource */
 /* currently we assume we have the one resource */
 res  = pdev->resource;
 size = res->end - res->start + 1;
 info->area = request_mem_region(res->start, size, pdev->name);
 if (info->area == NULL) {
  printk(KERN_ERR PFX "cannot reserve register region\n");
  err = -ENOENT;
  goto exit_error;
 }
 info->device     = dev;
 info->platform   = plat;
 info->regs       = ioremap(res->start, size);
 info->is_s3c2440 = is_s3c2440;
 if (info->regs == NULL) {
  printk(KERN_ERR PFX "cannot reserve register region\n");
  err = -EIO;
  goto exit_error;
 }  
 printk(KERN_INFO PFX "mapped registers at %p\n", info->regs);
 // initialise the hardware
 err = s3c2410_nand_inithw(info, dev);
 if (err != 0)
  goto exit_error;
 sets = (plat != NULL) ? plat->sets : NULL;
 nr_sets = (plat != NULL) ? plat->nr_sets : 1;
 info->mtd_count = nr_sets;
 /* allocate our information */
 size = nr_sets * sizeof(*info->mtds);
 info->mtds = kmalloc(size, GFP_KERNEL);
 if (info->mtds == NULL) {
  printk(KERN_ERR PFX "failed to allocate mtd storage\n");
  err = -ENOMEM;
  goto exit_error;
 }
 memzero(info->mtds, size);
 /* initialise all possible chips */
 nmtd = info->mtds;
 printk("#-----------------------nr_sets = %d\n",nr_sets);
 //这里只有一块NAND
 for (setno = 0; setno < nr_sets; setno++, nmtd++) {
  pr_debug("initialising set %d (%p, info %p)\n",
    setno, nmtd, info);
  //初始化每个块NAND
  s3c2410_nand_init_chip(info, nmtd, sets);
  //对当前NAND的主分区进行初始化。
  nmtd->scan_res = nand_scan(&nmtd->mtd,
        (sets) ? sets->nr_chips : 1);
  if (nmtd->scan_res == 0) {
   //注册flash上的所有分区为块设备
   s3c2410_nand_add_partition(info, nmtd, sets);
  }
  if (sets != NULL)
   sets++;
 }
 
 pr_debug("initialised ok\n");
 return 0;
 exit_error:
 s3c2410_nand_remove(dev);
 if (err == 0)
  err = -EINVAL;
 return err;
}
static int s3c2410_nand_add_partition(struct s3c2410_nand_info *info,
          struct s3c2410_nand_mtd *mtd,
          struct s3c2410_nand_set *set)
{
 if (set == NULL)
  return add_mtd_device(&mtd->mtd);
 //这里会把一个flash上的所有分区都注册成MTD设备,我这里是5个。
 if (set->nr_partitions > 0 && set->partitions != NULL) {
  printk("#----------------add_mtd_partitions(,,%d)\n",set->nr_partitions);
  return add_mtd_partitions(&mtd->mtd,
       set->partitions,
       set->nr_partitions);
 }
 return add_mtd_device(&mtd->mtd);
}
int add_mtd_partitions(struct mtd_info *master,
         const struct mtd_partition *parts,
         int nbparts)
{
 struct mtd_part *slave;
 u_int32_t cur_offset = 0;
 int i;
 printk (KERN_NOTICE "Creating %d MTD partitions on \"%s\":\n", nbparts, master->name);
 //处理每一个分区。
 for (i = 0; i < nbparts; i++)
 {
  /* allocate the partition structure */
  slave = kmalloc (sizeof(*slave), GFP_KERNEL);
  if (!slave) {
   printk ("memory allocation error while creating partitions for \"%s\"\n",
    master->name);
   del_mtd_partitions(master);
   return -ENOMEM;
  }
  memset(slave, 0, sizeof(*slave));
  list_add(&slave->list, &mtd_partitions);
  /* set up the MTD object for this partition */
  slave->mtd.type = master->type;
  slave->mtd.flags = master->flags & ~parts[i].mask_flags;
  slave->mtd.size = parts[i].size;
  slave->mtd.oobblock = master->oobblock;
  slave->mtd.oobsize = master->oobsize;
  slave->mtd.ecctype = master->ecctype;
  slave->mtd.eccsize = master->eccsize;
  slave->mtd.name = parts[i].name;
  slave->mtd.bank_size = master->bank_size;
  slave->mtd.owner = master->owner;
  slave->mtd.read = part_read;
  slave->mtd.write = part_write;
  if(master->point && master->unpoint){
   slave->mtd.point = part_point;
   slave->mtd.unpoint = part_unpoint;
  }
  
  if (master->read_ecc)
   slave->mtd.read_ecc = part_read_ecc;
  if (master->write_ecc)
   slave->mtd.write_ecc = part_write_ecc;
  if (master->read_oob)
   slave->mtd.read_oob = part_read_oob;
  if (master->write_oob)
   slave->mtd.write_oob = part_write_oob;
  if(master->read_user_prot_reg)
   slave->mtd.read_user_prot_reg = part_read_user_prot_reg;
  if(master->read_fact_prot_reg)
   slave->mtd.read_fact_prot_reg = part_read_fact_prot_reg;
  if(master->write_user_prot_reg)
   slave->mtd.write_user_prot_reg = part_write_user_prot_reg;
  if(master->lock_user_prot_reg)
   slave->mtd.lock_user_prot_reg = part_lock_user_prot_reg;
  if(master->get_user_prot_info)
   slave->mtd.get_user_prot_info = part_get_user_prot_info;
  if(master->get_fact_prot_info)
   slave->mtd.get_fact_prot_info = part_get_fact_prot_info;
  if (master->sync)
   slave->mtd.sync = part_sync;
  if (!i && master->suspend && master->resume) {
    slave->mtd.suspend = part_suspend;
    slave->mtd.resume = part_resume;
  }
  if (master->writev)
   slave->mtd.writev = part_writev;
  if (master->readv)
   slave->mtd.readv = part_readv;
  if (master->writev_ecc)
   slave->mtd.writev_ecc = part_writev_ecc;
  if (master->readv_ecc)
   slave->mtd.readv_ecc = part_readv_ecc;
  if (master->lock)
   slave->mtd.lock = part_lock;
  if (master->unlock)
   slave->mtd.unlock = part_unlock;
  if (master->block_isbad)
   slave->mtd.block_isbad = part_block_isbad;
  if (master->block_markbad)
   slave->mtd.block_markbad = part_block_markbad;
  slave->mtd.erase = part_erase;
  slave->master = master;
  slave->offset = parts[i].offset;
  slave->index = i;
  if (slave->offset == MTDPART_OFS_APPEND)
   slave->offset = cur_offset;
  if (slave->offset == MTDPART_OFS_NXTBLK) {
   u_int32_t emask = master->erasesize-1;
   slave->offset = (cur_offset + emask) & ~emask;
   if (slave->offset != cur_offset) {
    printk(KERN_NOTICE "Moving partition %d: "
           "0x%08x -> 0x%08x\n", i,
           cur_offset, slave->offset);
   }
  }
  if (slave->mtd.size == MTDPART_SIZ_FULL)
   slave->mtd.size = master->size - slave->offset;
  cur_offset = slave->offset + slave->mtd.size;
 
  printk (KERN_NOTICE "0x%08x-0x%08x : \"%s\"\n", slave->offset,
   slave->offset + slave->mtd.size, slave->mtd.name);
  /* let's do some sanity checks */
  if (slave->offset >= master->size) {
    /* let's register it anyway to preserve ordering */
   slave->offset = 0;
   slave->mtd.size = 0;
   printk ("mtd: partition \"%s\" is out of reach -- disabled\n",
    parts[i].name);
  }
  if (slave->offset + slave->mtd.size > master->size) {
   slave->mtd.size = master->size - slave->offset;
   printk ("mtd: partition \"%s\" extends beyond the end of device \"%s\" -- size truncated to %#x\n",
    parts[i].name, master->name, slave->mtd.size);
  }
  if (master->numeraseregions>1) {
   /* Deal with variable erase size stuff */
   int i;
   struct mtd_erase_region_info *regions = master->eraseregions;
   
   /* Find the first erase regions which is part of this partition. */
   for (i=0; i < master->numeraseregions && slave->offset >= regions[i].offset; i++)
    ;
   for (i--; i < master->numeraseregions && slave->offset + slave->mtd.size > regions[i].offset; i++) {
    if (slave->mtd.erasesize < regions[i].erasesize) {
     slave->mtd.erasesize = regions[i].erasesize;
    }
   }
  } else {
   /* Single erase size */
   slave->mtd.erasesize = master->erasesize;
  }
  if ((slave->mtd.flags & MTD_WRITEABLE) &&
      (slave->offset % slave->mtd.erasesize)) {
   /* Doesn't start on a boundary of major erase size */
   /* FIXME: Let it be writable if it is on a boundary of _minor_ erase size though */
   slave->mtd.flags &= ~MTD_WRITEABLE;
   printk ("mtd: partition \"%s\" doesn't start on an erase block boundary -- force read-only\n",
    parts[i].name);
  }
  if ((slave->mtd.flags & MTD_WRITEABLE) &&
      (slave->mtd.size % slave->mtd.erasesize)) {
   slave->mtd.flags &= ~MTD_WRITEABLE;
   printk ("mtd: partition \"%s\" doesn't end on an erase block -- force read-only\n",
    parts[i].name);
  }
  /* copy oobinfo from master */
  memcpy(&slave->mtd.oobinfo, &master->oobinfo, sizeof(slave->mtd.oobinfo));
  if(parts[i].mtdp)
  { /* store the object pointer (caller may or may not register it */
   *parts[i].mtdp = &slave->mtd;
   slave->registered = 0;
  }
  else
  {
   /* register our partition */
   //以分区中原始MTD的身份注册块设备
   add_mtd_device(&slave->mtd);
   slave->registered = 1;
  }
 }
 return 0;
}
int add_mtd_device(struct mtd_info *mtd)
{
 int i;
 down(&mtd_table_mutex);
 //在原始MTD设备表mtd_table里找到空闲的表项
 for (i=0; i < MAX_MTD_DEVICES; i++)
  if (!mtd_table[i]) {
   struct list_head *this;
   mtd_table[i] = mtd;
   mtd->index = i;
   mtd->usecount = 0;
   DEBUG(0, "mtd: Giving out device %d to %s\n",i, mtd->name);
   /* No need to get a refcount on the module containing
      the notifier, since we hold the mtd_table_mutex */
   //通知。。。
   list_for_each(this, &mtd_notifiers) {
    struct mtd_notifier *not = list_entry(this, struct mtd_notifier, list);
    not->add(mtd);
   }
   
   up(&mtd_table_mutex);
   /* We _know_ we aren't being removed, because
      our caller is still holding us here. So none
      of this try_ nonsense, and no bitching about it
      either. :) */
   __module_get(THIS_MODULE);
   return 0;
  }
 
 up(&mtd_table_mutex);
 return 1;
}
上面通知了,那通知给谁了呢?
//MTD块设备初始化。
static int __init init_mtdblock(void)
{
 return register_mtd_blktrans(&mtdblock_tr);
}
int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
{
 int ret, i;
 /* Register the notifier if/when the first device type is
    registered, to prevent the link/init ordering from fucking
    us over. */
 //注册到通知链。
 if (!blktrans_notifier.list.next)
  register_mtd_user(&blktrans_notifier);
 tr->blkcore_priv = kmalloc(sizeof(*tr->blkcore_priv), GFP_KERNEL);
 if (!tr->blkcore_priv)
  return -ENOMEM;
 memset(tr->blkcore_priv, 0, sizeof(*tr->blkcore_priv));
 down(&mtd_table_mutex);
 //在块设备哈希表里注册。
 ret = register_blkdev(tr->major, tr->name);
 if (ret) {
  printk(KERN_WARNING "Unable to register %s block device on major %d: %d\n",
         tr->name, tr->major, ret);
  kfree(tr->blkcore_priv);
  up(&mtd_table_mutex);
  return ret;
 }
 
 spin_lock_init(&tr->blkcore_priv->queue_lock);
 init_completion(&tr->blkcore_priv->thread_dead);
 init_waitqueue_head(&tr->blkcore_priv->thread_wq);
 tr->blkcore_priv->rq = blk_init_queue(mtd_blktrans_request, &tr->blkcore_priv->queue_lock);
 if (!tr->blkcore_priv->rq) {
  unregister_blkdev(tr->major, tr->name);
  kfree(tr->blkcore_priv);
  up(&mtd_table_mutex);
  return -ENOMEM;
 }
 tr->blkcore_priv->rq->queuedata = tr;
 ret = kernel_thread(mtd_blktrans_thread, tr, CLONE_KERNEL);
 if (ret < 0) {
  blk_cleanup_queue(tr->blkcore_priv->rq);
  unregister_blkdev(tr->major, tr->name);
  kfree(tr->blkcore_priv);
  up(&mtd_table_mutex);
  return ret;
 }
 devfs_mk_dir(tr->name);
 INIT_LIST_HEAD(&tr->devs);
 //注册到一个链表里,MTD设备的增加或去除会被通知。
 list_add(&tr->list, &blktrans_majors);
 for (i=0; i  if (mtd_table[i] && mtd_table[i]->type != MTD_ABSENT)
   tr->add_mtd(tr, mtd_table[i]);
 }
 up(&mtd_table_mutex);
 return 0;
}
static struct mtd_notifier blktrans_notifier = {
 .add = blktrans_notify_add,
 .remove = blktrans_notify_remove,
};
static void blktrans_notify_add(struct mtd_info *mtd)
{
 struct list_head *this;
 if (mtd->type == MTD_ABSENT)
  return;
 //调用所有的MTD主设备的方法
 list_for_each(this, &blktrans_majors) {
  struct mtd_blktrans_ops *tr = list_entry(this, struct mtd_blktrans_ops, list);
  tr->add_mtd(tr, mtd);
 }
}
static struct mtd_blktrans_ops mtdblock_tr = {
 .name  = "mtdblock",
 .major  = 31,
 .part_bits = 0,
 .open  = mtdblock_open,
 .flush  = mtdblock_flush,
 .release = mtdblock_release,
 .readsect = mtdblock_readsect,
 .writesect = mtdblock_writesect,
 .add_mtd = mtdblock_add_mtd,
 .remove_dev = mtdblock_remove_dev,
 .owner  = THIS_MODULE,
};
//上面的通知最后由这个函数来处理。
static void mtdblock_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
{
 struct mtd_blktrans_dev *dev = kmalloc(sizeof(*dev), GFP_KERNEL);
 if (!dev)
  return;
 memset(dev, 0, sizeof(*dev));
 //MTD翻译层块设备成员初始化。
 dev->mtd   = mtd;
 dev->devnum  = mtd->index;
 dev->blksize  = 512;
 dev->size   = mtd->size >> 9;
 dev->tr   = tr;
 if (!(mtd->flags & MTD_WRITEABLE))
  dev->readonly = 1;
 //MTD翻译层块设备注册
 add_mtd_blktrans_dev(dev);
}
//MTD翻译层块设备注册
int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
{
 struct mtd_blktrans_ops *tr = new->tr;
 struct list_head *this;
 int last_devnum = -1;
 struct gendisk *gd;
 if (!down_trylock(&mtd_table_mutex)) {
  up(&mtd_table_mutex);
  BUG();
 }
 //检查分区是否合法
 list_for_each(this, &tr->devs) {
  struct mtd_blktrans_dev *d = list_entry(this, struct mtd_blktrans_dev, list);
  if (new->devnum == -1)
  {
   /* Use first free number */
   if (d->devnum != last_devnum+1) {
    /* Found a free devnum. Plug it in here */
    new->devnum = last_devnum+1;
    list_add_tail(&new->list, &d->list);
    goto added;
   }
  }
  else if (d->devnum == new->devnum)
  {
   /* Required number taken */
   return -EBUSY;
  }
  else if (d->devnum > new->devnum)
  {
   /* Required number was free */
   list_add_tail(&new->list, &d->list);
   goto added;
  }
  last_devnum = d->devnum;
 }
 if (new->devnum == -1)
  new->devnum = last_devnum+1;
 if ((new->devnum << tr->part_bits) > 256) {
  return -EBUSY;
 }
 init_MUTEX(&new->sem);
 //链接到MTD翻译层结构
 list_add_tail(&new->list, &tr->devs);
 added:
 if (!tr->writesect)
  new->readonly = 1;
 //分配一个通用磁盘结构。
 gd = alloc_disk(1 << tr->part_bits);
 if (!gd) {
  list_del(&new->list);
  return -ENOMEM;
 }
 gd->major = tr->major;
 gd->first_minor = (new->devnum) << tr->part_bits;
 //?????
 gd->fops = &mtd_blktrans_ops;
 
 snprintf(gd->disk_name, sizeof(gd->disk_name),
   "%s%c", tr->name, (tr->part_bits?'a':'0') + new->devnum);
 snprintf(gd->devfs_name, sizeof(gd->devfs_name),
   "%s/%c", tr->name, (tr->part_bits?'a':'0') + new->devnum);
 /* 2.5 has capacity in units of 512 bytes while still
    having BLOCK_SIZE_BITS set to 10. Just to keep us amused. */
 set_capacity(gd, (new->size * new->blksize) >> 9);
 gd->private_data = new;
 new->blkcore_priv = gd;
 gd->queue = tr->blkcore_priv->rq;
 if (new->readonly)
  set_disk_ro(gd, 1);
 add_disk(gd);
 
 return 0;
}
void add_disk(struct gendisk *disk)
{
 disk->flags |= GENHD_FL_UP;
 //注册到块设备位图,这里是关键,当打开块设备时就从这个位图里找,也是这里是上与上的连接点。
 blk_register_region(MKDEV(disk->major, disk->first_minor),disk->minors, NULL, exact_match, exact_lock, disk);
 register_disk(disk);
 blk_register_queue(disk);
}
void blk_register_region(dev_t dev, unsigned long range, struct module *module,
    struct kobject *(*probe)(dev_t, int *, void *),
    int (*lock)(dev_t, void *), void *data)
{
 kobj_map(bdev_map, dev, range, module, probe, lock, data);
}
void register_disk(struct gendisk *disk)
{
 struct block_device *bdev;
 char *s;
 int err;
 strlcpy(disk->kobj.name,disk->disk_name,KOBJ_NAME_LEN);
 /* e some of these buggers have / in name... */
 s = strchr(disk->kobj.name, '/');
 if (s)
  *s = '!';
 //增加到sysfs
 if ((err = kobject_add(&disk->kobj)))
  return;
 disk_sysfs_symlinks(disk);
 kobject_hotplug(&disk->kobj, KOBJ_ADD);
 /* No minors to use for partitions */
 printk("#1--------------register_disk disk->minors=%d\n",disk->minors);
 if (disk->minors == 1) {
  if (disk->devfs_name[0] != '\0')
   //在dev下创建块设备。到这里所有创建工作总算完事了。
   devfs_add_disk(disk);
  return;
 }
 
 printk("#2--------------register_disk disk->minors=%d\n",disk->minors);
 /* always add handle for the whole disk */
 devfs_add_partitioned(disk);
 /* No such device (e.g., media were just removed) */
 if (!get_capacity(disk))
  return;
 bdev = bdget_disk(disk, 0);
 if (!bdev)
  return;
 bdev->bd_invalidated = 1;
 if (blkdev_get(bdev, FMODE_READ, 0) < 0)
  return;
 blkdev_put(bdev);
}
到此为止在dev文件系统下已经创建了结点,现在可以打开设备了。
2、块设备的打开。
先做一下准备工作。
//对块设备进行整体初始化。
static int __init genhd_device_init(void)
{
 //初始化块设备位图。
 bdev_map = kobj_map_init(base_probe, &block_subsys_sem);
 //块设备相关结构高速缓存分配
 blk_dev_init();
 printk("###genhd_device_init %s\n",block_subsys.kset.kobj.name);
 //在 sys文件系统下创建/sys/block
 subsystem_register(&block_subsys);
 return 0;
}
subsys_initcall(genhd_device_init);
//由start_kernel调用。
void __init vfs_caches_init(unsigned long mempages)
{
 unsigned long reserve;
 /* Base hash sizes on available memory, with a reserve equal to
           150% of current kernel size */
 reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
 mempages -= reserve;
 names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
   SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
   SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor);
 dcache_init(mempages);
 inode_init(mempages);
 files_init(mempages);
 mnt_init(mempages);
 bdev_cache_init();
 chrdev_init();
}

struct bdev_inode {
 //块设备结构
 struct block_device bdev;
 //在块设备伪文件系统中的I节点。
 struct inode vfs_inode;
};
//块设备文件系统。
static struct file_system_type bd_type = {
 .name  = "bdev",
 .get_sb  = bd_get_sb,
 .kill_sb = kill_anon_super,
};
//块设备伪文件系统安装信息。
static struct vfsmount *bd_mnt;
//块设备伪文件系统的超级块。
struct super_block *blockdev_superblock;
void __init bdev_cache_init(void)
{
 int err;
 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
   0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_PANIC,
   init_once, NULL);
 //注册块设备文件系统。 
 err = register_filesystem(&bd_type);
 if (err)
  panic("Cannot register bdev pseudo-fs");
 //创建一个vfsmount但并不没有真正的安装点。
 bd_mnt = kern_mount(&bd_type);
 err = PTR_ERR(bd_mnt);
 if (IS_ERR(bd_mnt))
  panic("Cannot create bdev pseudo-fs");
 blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */
}
为块设备还注册了一个文件系统用,可以看出分配一个I节点是就分配了struct block_device.
相关结构。
struct block_device
{
 //块设备号。
 dev_t   bd_dev;  /* not a kdev_t - it's a search key */
 //块设备伪文件系统的I节点。
 struct inode *  bd_inode; /* will die */
 int   bd_openers;
 struct semaphore bd_sem; /* open/close mutex */
 struct semaphore bd_mount_sem; /* mount mutex */
 //所有对应当前块设备的I节点。
 struct list_head bd_inodes;
 void *   bd_holder;
 int   bd_holders;
 struct block_device * bd_contains;
 //块大小。
 unsigned  bd_block_size;
 
 struct hd_struct * bd_part;
 /* number of times partitions within this device have been opened. */
 unsigned  bd_part_count;
 int   bd_invalidated;
 struct gendisk * bd_disk;
 //属于所有块设备的一个成员。
 struct list_head bd_list;
 struct backing_dev_info *bd_inode_backing_dev_info;
 /*
  * Private data.  You must have bd_claim'ed the block_device
  * to use this.  NOTE:  bd_claim allows an owner to claim
  * the same device multiple times, the owner must take special
  * care to not mess up bd_private for that case.
  */
 unsigned long  bd_private;
};
struct gendisk {
 //块设备的主设备号
 int major;   /* major number of driver */
 //第一个次设备号
 int first_minor;
 //次设备号范围。
 int minors;                     /* maximum number of minors, =1 for
    //mtdblock0                                * disks that can't be partitioned. */
 char disk_name[32];  /* name of major driver */
 struct hd_struct **part; /* [indexed by minor] */
 //对应的块设备操作。
 struct block_device_operations *fops;
 //请求队列
 struct request_queue *queue;
 //struct mtd_blktrans_dev,MTD翻译层设备
 void *private_data;
 sector_t capacity;
 int flags;
 //mtdblock/0
 char devfs_name[64];  /* devfs crap */
 int number;   /* more of the same */
 struct device *driverfs_dev;
 struct kobject kobj;
 struct timer_rand_state *random;
 int policy;
 atomic_t sync_io;  /* RAID */
 unsigned long stamp, stamp_idle;
 int in_flight;
#ifdef CONFIG_SMP
 struct disk_stats *dkstats;
#else
 struct disk_stats dkstats;
#endif
};
这两个结构是对块设备不同层次的描述。
开始blkdev_open。
static int blkdev_open(struct inode * inode, struct file * filp)
{
 struct block_device *bdev;
 int res;
 /*
  * Preserve backwards compatibility and allow large file access
  * even if userspace doesn't ask for it explicitly. Some mkfs
  * binary needs it. We might want to drop this workaround
  * during an unstable branch.
  */
  printk("#-------------------------blkdev_open\n");
 filp->f_flags |= O_LARGEFILE;
 //查找或分配struct block_devece.
 bdev = bd_acquire(inode);
 res = do_open(bdev, filp);
 if (res)
  return res;
 if (!(filp->f_flags & O_EXCL) )
  return 0;
 if (!(res = bd_claim(bdev, filp)))
  return 0;
 blkdev_put(bdev);
 return res;
}
static struct block_device *bd_acquire(struct inode *inode)
{
 struct block_device *bdev;
 spin_lock(&bdev_lock);
 bdev = inode->i_bdev;
 //是否已经打开过
 if (bdev && igrab(bdev->bd_inode)) {
  spin_unlock(&bdev_lock);
  return bdev;
 }
 spin_unlock(&bdev_lock);
 //根据设备号到块设备伪文件系统中去找或分配block_device结构。
 bdev = bdget(inode->i_rdev);
 if (bdev) {
  spin_lock(&bdev_lock);
  if (inode->i_bdev)
   __bd_forget(inode);
  inode->i_bdev = bdev;
  inode->i_mapping = bdev->bd_inode->i_mapping;
  list_add(&inode->i_devices, &bdev->bd_inodes);
  spin_unlock(&bdev_lock);
 }
 return bdev;
}
struct block_device *bdget(dev_t dev)
{
 struct block_device *bdev;
 struct inode *inode;
 inode = iget5_locked(bd_mnt->mnt_sb, hash(dev),
   bdev_test, bdev_set, &dev);
 if (!inode)
  return NULL;
 //把I节点转成block_deivce
 bdev = &BDEV_I(inode)->bdev;
 //如果新分配的。
 if (inode->i_state & I_NEW) {
  bdev->bd_contains = NULL;
  bdev->bd_inode = inode;
  bdev->bd_block_size = (1 << inode->i_blkbits);
  bdev->bd_part_count = 0;
  bdev->bd_invalidated = 0;
  inode->i_mode = S_IFBLK;
  inode->i_rdev = dev;
  inode->i_bdev = bdev;
  inode->i_data.a_ops = &def_blk_aops;
  mapping_set_gfp_mask(&inode->i_data, GFP_USER);
  inode->i_data.backing_dev_info = &default_backing_dev_info;
  spin_lock(&bdev_lock);
  list_add(&bdev->bd_list, &all_bdevs);
  spin_unlock(&bdev_lock);
  unlock_new_inode(inode);
 }
 return bdev;
}
struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
  int (*test)(struct inode *, void *),
  int (*set)(struct inode *, void *), void *data)
{
 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
 struct inode *inode;
 //查找I节点。
 inode = ifind(sb, head, test, data, 1);
 if (inode)
  return inode;
 /*
  * get_new_inode() will do the right thing, re-trying the search
  * in case it had to block at any point.
  */
 //分配一个新的I节点。当然这里是用块设备文件系统分配的I节点。
 return get_new_inode(sb, head, test, set, data);
}
static int do_open(struct block_device *bdev, struct file *file)
{
 struct module *owner = NULL;
 struct gendisk *disk;
 int ret = -ENXIO;
 int part;
 file->f_mapping = bdev->bd_inode->i_mapping;
 lock_kernel();
 //返回设备号对应的struct gendisk.
 disk = get_gendisk(bdev->bd_dev, &part);
 if (!disk) {
  unlock_kernel();
  bdput(bdev);
  return ret;
 }
 owner = disk->fops->owner;
 down(&bdev->bd_sem);
 if (!bdev->bd_openers)
 {
  bdev->bd_disk = disk;
  bdev->bd_contains = bdev;
  if (!part)
  {
   struct backing_dev_info *bdi;
   if (disk->fops->open)
   {
    ret = disk->fops->open(bdev->bd_inode, file);
    if (ret)
     goto out_first;
   }
   if (!bdev->bd_openers) {
    bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
    bdi = blk_get_backing_dev_info(bdev);
    if (bdi == NULL)
     bdi = &default_backing_dev_info;
    bdev->bd_inode->i_data.backing_dev_info = bdi;
   }
   if (bdev->bd_invalidated)
    rescan_partitions(disk, bdev);
  }
  else
  {
   struct hd_struct *p;
   struct block_device *whole;
   whole = bdget_disk(disk, 0);
   ret = -ENOMEM;
   if (!whole)
    goto out_first;
   ret = blkdev_get(whole, file->f_mode, file->f_flags);
   if (ret)
    goto out_first;
   bdev->bd_contains = whole;
   down(&whole->bd_sem);
   whole->bd_part_count++;
   p = disk->part[part - 1];
   bdev->bd_inode->i_data.backing_dev_info =
      whole->bd_inode->i_data.backing_dev_info;
   if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) {
    whole->bd_part_count--;
    up(&whole->bd_sem);
    ret = -ENXIO;
    goto out_first;
   }
   kobject_get(&p->kobj);
   bdev->bd_part = p;
   bd_set_size(bdev, (loff_t) p->nr_sects << 9);
   up(&whole->bd_sem);
  }
 }
 else
 {
  put_disk(disk);
  module_put(owner);
  if (bdev->bd_contains == bdev)
  {
   if (bdev->bd_disk->fops->open)
   {
    ret = bdev->bd_disk->fops->open(bdev->bd_inode, file);
    if (ret)
     goto out;
   }
   if (bdev->bd_invalidated)
    rescan_partitions(bdev->bd_disk, bdev);
  }
  else
  {
   down(&bdev->bd_contains->bd_sem);
   bdev->bd_contains->bd_part_count++;
   up(&bdev->bd_contains->bd_sem);
  }
 }
 bdev->bd_openers++;
 up(&bdev->bd_sem);
 unlock_kernel();
 return 0;
out_first:
 bdev->bd_disk = NULL;
 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
 if (bdev != bdev->bd_contains)
  blkdev_put(bdev->bd_contains);
 bdev->bd_contains = NULL;
 put_disk(disk);
 module_put(owner);
out:
 up(&bdev->bd_sem);
 unlock_kernel();
 if (ret)
  bdput(bdev);
 return ret;
}
//这是接头的地方。
struct gendisk *get_gendisk(dev_t dev, int *part)
{
 struct kobject *kobj = kobj_lookup(bdev_map, dev, part);
 return  kobj ? to_disk(kobj) : NULL;
}
 
 
 
 

 
阅读(1817) | 评论(0) | 转发(0) |
0

上一篇:(7)linux字符设备

下一篇:loader启动过程

给主人留下些什么吧!~~