Chinaunix首页 | 论坛 | 博客
  • 博客访问: 5606989
  • 博文数量: 922
  • 博客积分: 19333
  • 博客等级: 上将
  • 技术积分: 11226
  • 用 户 组: 普通用户
  • 注册时间: 2007-03-27 14:33
文章分类

全部博文(922)

文章存档

2023年(1)

2020年(2)

2019年(1)

2017年(1)

2016年(3)

2015年(10)

2014年(17)

2013年(49)

2012年(291)

2011年(266)

2010年(95)

2009年(54)

2008年(132)

分类: LINUX

2011-11-21 14:48:55


参考:http://hi.baidu.com/rwen2012/blog/item/9b46fb3e4c22c1fb828b13dc.html

  1. /*
  2. * ramdisk.c - Multiple RAM disk driver - gzip-loading version - v. 0.8 beta.
  3. *
  4. * (C) Chad Page, Theodore Ts'o, et. al, 1995.
  5. *
  6. * This RAM disk is designed to have filesystems created on it and mounted
  7. * just like a regular floppy disk.
  8. *
  9. * It also does something suggested by Linus: use the buffer cache as the
  10. * RAM disk data. This makes it possible to dynamically allocate the RAM disk
  11. * buffer - with some consequences I have to deal with as I write this.
  12. *
  13. * This code is based on the original ramdisk.c, written mostly by
  14. * Theodore Ts'o (TYT) in 1991. The code was largely rewritten by
  15. * Chad Page to use the buffer cache to store the RAM disk data in
  16. * 1995; Theodore then took over the driver again, and cleaned it up
  17. * for inclusion in the mainline kernel.
  18. *
  19. * The original CRAMDISK code was written by Richard Lyons, and
  20. * adapted by Chad Page to use the new RAM disk interface. Theodore
  21. * Ts'o rewrote it so that both the compressed RAM disk loader and the
  22. * kernel decompressor uses the same inflate.c codebase. The RAM disk
  23. * loader now also loads into a dynamic (buffer cache based) RAM disk,
  24. * not the old static RAM disk. Support for the old static RAM disk has
  25. * been completely removed.
  26. *
  27. * Loadable module support added by Tom Dyas.
  28. *
  29. */

  30. #include <linux/string.h>
  31. #include <linux/slab.h>
  32. #include <asm/atomic.h>
  33. #include <linux/bio.h>
  34. #include <linux/module.h>
  35. #include <linux/moduleparam.h>
  36. #include <linux/init.h>
  37. #include <linux/pagemap.h>
  38. #include <linux/blkdev.h>
  39. #include <linux/genhd.h>
  40. #include <linux/buffer_head.h> /* for invalidate_bdev() */
  41. #include <linux/backing-dev.h>
  42. #include <linux/blkpg.h>
  43. #include <linux/writeback.h>

  44. #include <asm/uaccess.h>

  45. /* Various static variables go here. Most are used only in the RAM disk code.
  46. */

  47. static struct gendisk *rd_disks[CONFIG_BLK_DEV_RAM_COUNT]; //the default value is 16
  48. static struct block_device *rd_bdev[CONFIG_BLK_DEV_RAM_COUNT];/* Protected device data */
  49. static struct request_queue *rd_queue[CONFIG_BLK_DEV_RAM_COUNT];

  50. /*
  51. * Parameters for the boot-loading of the RAM disk. These are set by
  52. * init/main.c (from arguments to the kernel command line) or from the
  53. * architecture-specific setup routine (from the stored boot sector
  54. * information).
  55. */
  56. int rd_size = CONFIG_BLK_DEV_RAM_SIZE; // 4M /* Size of the RAM disks */
  57. /*
  58. * It would be very desirable to have a soft-blocksize (that in the case
  59. * of the ramdisk driver is also the hardblocksize ;) of PAGE_SIZE because
  60. * doing that we'll achieve a far better MM footprint. Using a rd_blocksize of
  61. * BLOCK_SIZE in the worst case we'll make PAGE_SIZE/BLOCK_SIZE buffer-pages
  62. * unfreeable. With a rd_blocksize of PAGE_SIZE instead we are sure that only
  63. * 1 page will be protected. Depending on the size of the ramdisk you
  64. * may want to change the ramdisk blocksize to achieve a better or worse MM
  65. * behaviour. The default is still BLOCK_SIZE (needed by rd_load_image that
  66. * supposes the filesystem in the image uses a BLOCK_SIZE blocksize).
  67. */
  68. static int rd_blocksize = CONFIG_BLK_DEV_RAM_BLOCKSIZE; //the block size

  69. /*
  70. * Copyright (C) 2000 Linus Torvalds.
  71. * 2000 Transmeta Corp.
  72. * aops copied from ramfs.
  73. */

  74. /*
  75. * If a ramdisk page has buffers, some may be uptodate and some may be not.
  76. * To bring the page uptodate we zero out the non-uptodate buffers. The
  77. * page must be locked.
  78. */
  79. static void make_page_uptodate(struct page *page)
  80. {
  81.    if (page_has_buffers(page)) {
  82.        struct buffer_head *bh = page_buffers(page);
  83.        struct buffer_head *head = bh;

  84.        do {
  85.            if (!buffer_uptodate(bh)) {
  86.                memset(bh->b_data, 0, bh->b_size);
  87.                /*
  88.                * akpm: I'm totally undecided about this. The
  89.                * buffer has just been magically brought "up to
  90.                * date", but nobody should want to be reading
  91.                * it anyway, because it hasn't been used for
  92.                * anything yet. It is still in a "not read
  93.                * from disk yet" state.
  94.                *
  95.                * But non-uptodate buffers against an uptodate
  96.                * page are against the rules. So do it anyway.
  97.                */
  98.                set_buffer_uptodate(bh);
  99.            }
  100.        } while ((bh = bh->b_this_page) != head);
  101.    } else {
  102.        memset(page_address(page), 0, PAGE_CACHE_SIZE);
  103.    }
  104.    flush_dcache_page(page);
  105.    SetPageUptodate(page);
  106. }

  107. static int ramdisk_readpage(struct file *file, struct page *page)
  108. {
  109.    if (!PageUptodate(page))
  110.        make_page_uptodate(page);
  111.    unlock_page(page);
  112.    return 0;
  113. }

  114. static int ramdisk_prepare_write(struct file *file, struct page *page,
  115.                unsigned offset, unsigned to)
  116. {
  117.    if (!PageUptodate(page))
  118.        make_page_uptodate(page);
  119.    return 0;
  120. }

  121. static int ramdisk_commit_write(struct file *file, struct page *page,
  122.                unsigned offset, unsigned to)
  123. {
  124.    set_page_dirty(page);
  125.    return 0;
  126. }

  127. /*
  128. * ->writepage to the the blockdev's mapping has to redirty the page so that the
  129. * VM doesn't go and steal it. We return AOP_WRITEPAGE_ACTIVATE so that the VM
  130. * won't try to (pointlessly) write the page again for a while.
  131. *
  132. * Really, these pages should not be on the LRU at all.
  133. */
  134. static int ramdisk_writepage(struct page *page, struct writeback_control *wbc)
  135. {
  136.    if (!PageUptodate(page))
  137.        make_page_uptodate(page);
  138.    SetPageDirty(page);
  139.    if (wbc->for_reclaim)
  140.        return AOP_WRITEPAGE_ACTIVATE;
  141.    unlock_page(page);
  142.    return 0;
  143. }

  144. /*
  145. * This is a little speedup thing: short-circuit attempts to write back the
  146. * ramdisk blockdev inode to its non-existent backing store.
  147. */
  148. static int ramdisk_writepages(struct address_space *mapping,
  149.                struct writeback_control *wbc)
  150. {
  151.    return 0;
  152. }

  153. /*
  154. * ramdisk blockdev pages have their own ->set_page_dirty() because we don't
  155. * want them to contribute to dirty memory accounting.
  156. */
  157. static int ramdisk_set_page_dirty(struct page *page)
  158. {
  159.    if (!TestSetPageDirty(page))
  160.        return 1;
  161.    return 0;
  162. }


  163. //块设备的address_space_operations结构,注意与文件系统的相应结果的区别。
  164. static const struct address_space_operations ramdisk_aops = {
  165.    .readpage = ramdisk_readpage,
  166.    .prepare_write = ramdisk_prepare_write,
  167.    .commit_write = ramdisk_commit_write,
  168.    .writepage = ramdisk_writepage,
  169.    .set_page_dirty = ramdisk_set_page_dirty,
  170.    .writepages = ramdisk_writepages,
  171. };

  172. static int rd_blkdev_pagecache_IO(int rw, struct bio_vec *vec, sector_t sector,
  173.                struct address_space *mapping)
  174. {
  175.    pgoff_t index = sector >> (PAGE_CACHE_SHIFT - 9);
  176.    unsigned int vec_offset = vec->bv_offset;
  177.    int offset = (sector << 9) & ~PAGE_CACHE_MASK;
  178.    int size = vec->bv_len;
  179.    int err = 0;

  180.    do {
  181.        int count;
  182.        struct page *page;
  183.        char *src;
  184.        char *dst;

  185.        count = PAGE_CACHE_SIZE - offset;
  186.        if (count > size)
  187.            count = size;
  188.        size -= count;

  189.        //从页面缓存中读,如果没有,则出错,而不像对于ide-disk的处理,它出错会导致从磁盘中读入。
  190.        page = grab_cache_page(mapping, index);
  191.        if (!page) {
  192.            err = -ENOMEM;
  193.            goto out;
  194.        }

  195.        if (!PageUptodate(page))
  196.            make_page_uptodate(page);

  197.        index++;

  198.        //建立映射
  199.        if (rw == READ) {
  200.            src = kmap_atomic(page, KM_USER0) + offset;
  201.            dst = kmap_atomic(vec->bv_page, KM_USER1) + vec_offset;
  202.        } else {
  203.            src = kmap_atomic(vec->bv_page, KM_USER0) + vec_offset;
  204.            dst = kmap_atomic(page, KM_USER1) + offset;
  205.        }
  206.        offset = 0;
  207.        vec_offset += count;

  208.        memcpy(dst, src, count); //拷贝

  209.        //临时映射必须在使用完后立即解除映射
  210.        kunmap_atomic(src, KM_USER0);
  211.        kunmap_atomic(dst, KM_USER1);

  212.        if (rw == READ)
  213.            flush_dcache_page(vec->bv_page);
  214.        else
  215.            set_page_dirty(page); //使页面不可被释放
  216.        unlock_page(page);
  217.        put_page(page);
  218.    } while (size);

  219. out:
  220.    return err;
  221. }

  222. /*
  223. * Basically, my strategy here is to set up a buffer-head which can't be
  224. * deleted, and make that my Ramdisk. If the request is outside of the
  225. * allocated size, we must get rid of it...
  226. *
  227. * 19-JAN-1998 Richard Gooch <rgooch@atnf.csiro.au> Added devfs support
  228. *
  229. */
  230. //这是个处理BIO请求的函数,对于IDE-DISK相应的函数是通过IO-scheduler将待处理的BIO请求加入到请求队列中,或将这个BIO合并到合适的请求(request)中,在做下一步的处理。而现在对于ramdisk这个随机访问设备,IO-scheduler则不必要,所以这个函数直接处理的BIO请求。

  231. static int rd_make_request(request_queue_t *q, struct bio *bio)
  232. {
  233.    struct block_device *bdev = bio->bi_bdev;
  234.    struct address_space * mapping = bdev->bd_inode->i_mapping;
  235.    sector_t sector = bio->bi_sector;
  236.    unsigned long len = bio->bi_size >> 9;
  237.    int rw = bio_data_dir(bio);
  238.    struct bio_vec *bvec;
  239.    int ret = 0, i;

  240.    if (sector + len > get_capacity(bdev->bd_disk))
  241.        goto fail;

  242.    if (rw==READA) //预备读处理
  243.        rw=READ;

  244.    bio_for_each_segment(bvec, bio, i) { //处理BIO中的每一个segment。
  245.        ret |= rd_blkdev_pagecache_IO(rw, bvec, sector, mapping);
  246.        sector += bvec->bv_len >> 9;
  247.    }
  248.    if (ret)
  249.        goto fail;

  250.    bio_endio(bio, bio->bi_size, 0);
  251.    return 0;
  252. fail:
  253.    bio_io_error(bio, bio->bi_size);
  254.    return 0;
  255. }

  256. static int rd_ioctl(struct inode *inode, struct file *file,
  257.            unsigned int cmd, unsigned long arg)
  258. {
  259.    int error;
  260.    struct block_device *bdev = inode->i_bdev;

  261.    if (cmd != BLKFLSBUF)
  262.        return -ENOTTY;

  263.    /*
  264.    * special: we want to release the ramdisk memory, it's not like with
  265.    * the other blockdevices where this ioctl only flushes away the buffer
  266.    * cache
  267.    */
  268.    error = -EBUSY;
  269.    mutex_lock(&bdev->bd_mutex);
  270.    if (bdev->bd_openers <= 2) {
  271.        truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
  272.        error = 0;
  273.    }
  274.    mutex_unlock(&bdev->bd_mutex);
  275.    return error;
  276. }

  277. /*
  278. * This is the backing_dev_info for the blockdev inode itself. It doesn't need
  279. * writeback and it does not contribute to dirty memory accounting.
  280. */
  281. static struct backing_dev_info rd_backing_dev_info = {
  282.    .ra_pages = 0, /* No readahead */
  283.    .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK | BDI_CAP_MAP_COPY,
  284.    .unplug_io_fn = default_unplug_io_fn,
  285. };

  286. /*
  287. * This is the backing_dev_info for the files which live atop the ramdisk
  288. * "device". These files do need writeback and they do contribute to dirty
  289. * memory accounting.
  290. */
  291. static struct backing_dev_info rd_file_backing_dev_info = {
  292.    .ra_pages = 0, /* No readahead */
  293.    .capabilities = BDI_CAP_MAP_COPY, /* Does contribute to dirty memory */
  294.    .unplug_io_fn = default_unplug_io_fn,
  295. };

  296. static int rd_open(struct inode *inode, struct file *filp)
  297. {
  298.    unsigned unit = iminor(inode);

  299.    if (rd_bdev[unit] == NULL) {
  300.        struct block_device *bdev = inode->i_bdev;
  301.        struct address_space *mapping;
  302.        unsigned bsize;
  303.        gfp_t gfp_mask;

  304.        inode = igrab(bdev->bd_inode);
  305.        rd_bdev[unit] = bdev;
  306.        bdev->bd_openers++;
  307.        bsize = bdev_hardsect_size(bdev);
  308.        bdev->bd_block_size = bsize;
  309.        inode->i_blkbits = blksize_bits(bsize);
  310.        inode->i_size = get_capacity(bdev->bd_disk)<<9;

  311.        mapping = inode->i_mapping;
  312.        mapping->a_ops = &ramdisk_aops;
  313.        mapping->backing_dev_info = &rd_backing_dev_info;
  314.        bdev->bd_inode_backing_dev_info = &rd_file_backing_dev_info;

  315.        /*
  316.        * Deep badness. rd_blkdev_pagecache_IO() needs to allocate
  317.        * pagecache pages within a request_fn. We cannot recur back
  318.        * into the filesytem which is mounted atop the ramdisk, because
  319.        * that would deadlock on fs locks. And we really don't want
  320.        * to reenter rd_blkdev_pagecache_IO when we're already within
  321.        * that function.
  322.        *
  323.        * So we turn off __GFP_FS and __GFP_IO.
  324.        *
  325.        * And to give this thing a hope of working, turn on __GFP_HIGH.
  326.        * Hopefully, there's enough regular memory allocation going on
  327.        * for the page allocator emergency pools to keep the ramdisk
  328.        * driver happy.
  329.        */
  330.        gfp_mask = mapping_gfp_mask(mapping);
  331.        gfp_mask &= ~(__GFP_FS|__GFP_IO);
  332.        gfp_mask |= __GFP_HIGH;
  333.        mapping_set_gfp_mask(mapping, gfp_mask);
  334.    }

  335.    return 0;
  336. }

  337. static struct block_device_operations rd_bd_op = {
  338.    .owner = THIS_MODULE,
  339.    .open = rd_open,
  340.    .ioctl = rd_ioctl,
  341. };

  342. /*
  343. * Before freeing the module, invalidate all of the protected
  344. */
  345. static void __exit rd_cleanup(void)
  346. {
  347.    int i;

  348.    for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) {
  349.        struct block_device *bdev = rd_bdev[i];
  350.        rd_bdev[i] = NULL;
  351.        if (bdev) {
  352.            invalidate_bdev(bdev, 1);
  353.            blkdev_put(bdev);
  354.        }
  355.        del_gendisk(rd_disks[i]);
  356.        put_disk(rd_disks[i]);
  357.        blk_cleanup_queue(rd_queue[i]);
  358.    }
  359.    unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
  360. }

  361. /*
  362. * This is the registration and initialization section of the RAM disk driver
  363. */
  364. static int __init rd_init(void)
  365. {
  366.    int i;
  367.    int err = -ENOMEM;

  368.    //检查块大小是否合适,它必须小于一个内存页面的大小,且要大于512字节,还要是2的N次幂。
  369.    if (rd_blocksize > PAGE_SIZE || rd_blocksize < 512 ||
  370.            (rd_blocksize & (rd_blocksize-1))) {
  371.        printk("RAMDISK: wrong blocksize %d, reverting to defaults\n",
  372.                rd_blocksize);
  373.        rd_blocksize = BLOCK_SIZE; //如果不是则使用默认值
  374.    }

  375.    for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) {
  376.        rd_disks[i] = alloc_disk(1); //分配硬盘描述符
  377.        if (!rd_disks[i])
  378.            goto out;

  379.        rd_queue[i] = blk_alloc_queue(GFP_KERNEL); //分配请求队列描述符
  380.        if (!rd_queue[i]) {
  381.            put_disk(rd_disks[i]);
  382.            goto out;
  383.        }
  384.    }

  385.    //注册进内核
  386.    if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) {
  387.        err = -EIO;
  388.        goto out;
  389.    }

  390.    //初始化请求队列,并将磁盘加入到队列中去
  391.    for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) {
  392.        struct gendisk *disk = rd_disks[i];

  393.    //注册请求队列的BIO请求函数
  394.    blk_queue_make_request(rd_queue[i], &rd_make_request);
  395.        //盘块大小
  396. blk_queue_hardsect_size(rd_queue[i], rd_blocksize);

  397.        /* rd_size is given in kB */
  398.        //以下两行注册设备的主次号
  399.        disk->major = RAMDISK_MAJOR; //对于RAMDISK,主设备号都是这个
  400.        disk->first_minor = i; //次设备号标志是第几个盘
  401.        disk->fops = &rd_bd_op;
  402.        disk->queue = rd_queue[i]; //每个盘一个请求队列。
  403.        disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
  404.        sprintf(disk->disk_name, "ram%d", i);
  405.        set_capacity(disk, rd_size * 2);
  406.        add_disk(rd_disks[i]); //加入,至此,该盘可以被访问
  407.    }

  408.    /* rd_size is given in kB */
  409.    printk("RAMDISK driver initialized: "
  410.        "%d RAM disks of %dK size %d blocksize\n",
  411.        CONFIG_BLK_DEV_RAM_COUNT, rd_size, rd_blocksize);

  412.    return 0;
  413. out:
  414.    while (i--) {
  415.        put_disk(rd_disks[i]);
  416.        blk_cleanup_queue(rd_queue[i]);
  417.    }
  418.    return err;
  419. }

  420. module_init(rd_init);
  421. module_exit(rd_cleanup);

  422. /* options - nonmodular */
  423. #ifndef MODULE
  424. static int __init ramdisk_size(char *str)
  425. {
  426.    rd_size = simple_strtol(str,NULL,0);
  427.    return 1;
  428. }
  429. static int __init ramdisk_size2(char *str) /* kludge */
  430. {
  431.    return ramdisk_size(str);
  432. }
  433. static int __init ramdisk_blocksize(char *str)
  434. {
  435.    rd_blocksize = simple_strtol(str,NULL,0);
  436.    return 1;
  437. }
  438. __setup("ramdisk=", ramdisk_size); //为向前兼容而存在,等于ramdisk_size
  439. __setup("ramdisk_size=", ramdisk_size2);
  440. __setup("ramdisk_blocksize=", ramdisk_blocksize);
  441. #endif

  442. /* options - modular */
  443. module_param(rd_size, int, 0);
  444. MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
  445. module_param(rd_blocksize, int, 0);
  446. MODULE_PARM_DESC(rd_blocksize, "Blocksize of each RAM disk in bytes.");
  447. MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);

  448. MODULE_LICENSE("GPL");

阅读(2207) | 评论(0) | 转发(0) |
0

上一篇:通关

下一篇:放手

给主人留下些什么吧!~~