MTD块设备的数据读取-yqhminquan-ChinaUnix博客

yqhminquan的ChinaUnix博客

首页　| 　博文目录　| 　关于我

yqhminquan

博客访问： 184847
博文数量： 44
博客积分： 627
博客等级：中士
技术积分： 345
用户组：普通用户
注册时间： 2012-02-20 21:55

文章分类

全部博文（44）

嵌入式文件系统（2）
u-boot（0）
linux内核机制（2）
USB（0）
gcc编译相关（5）
linux设备驱动（35）

spi（1）

rtc（1）

nor（1）

nand（0）

LCD驱动（0）

触摸屏驱动（1）

串口驱动（1）

usb子系统（4）

MTD块设备（4）

input子系统（3）

声卡（1）

I2C（2）

块设备（4）

SD卡（4）

网络设备驱动（8）
未分配的博文（0）

文章存档

2012年（44）

我的朋友

相关博文

MTD块设备的数据读取

分类： LINUX

2012-08-08 17:21:03

MTD块设备的数据读取

2010-07-21 21:42:12| 分类： arm linux设备驱 | 标签： |字号大中小

二，数据读取

数据读取分为同步读do_sync_read和异步读generic_file_aio_read。

它们的区别是同步读，要等数据读取结束函数才返回，异步读是在函数返回时数据读取可能还没结束。同步读是通过调用异步读取函数加等待函数实现的。

ssize_t

generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,

unsigned long nr_segs, loff_t pos)

{

struct file *filp = iocb->ki_filp;

ssize_t retval;

unsigned long seg;

size_t count;

loff_t *ppos = &iocb->ki_pos;

count = 0;

//检测用于存放将要读取数据的存储空间是否有效，

//此处是VERIFY_WRITE是因为读取的数据将要写入这些内存中。

retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);

if (retval)

return retval;

/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */

将磁盘上的数据缓存在内存中，加速文件的读写。实际上，在一般情况下，read/write是只跟缓存打交道的。（当然，存在特殊情况。下面会说到。）
read就直接从缓存读数据。如果要读的数据还不在缓存中，则触发一次读盘操作，然后等待磁盘上的数据被更新到磁盘高速缓存中；write也是直接写到缓存里去，然后就不用管了。后续内核会负责将数据写回磁盘。

如果定义了O_DIRECT:直接传送数据`绕过了页高速缓存

if (filp->f_flags & O_DIRECT) {

。

}

for (seg = 0; seg < nr_segs; seg++) {

read_descriptor_t desc;

//read_descriptor_t:读操作描述符`用来记录读的状态

desc.written = 0;

desc.arg.buf = iov[seg].iov_base;

desc.count = iov[seg].iov_len;

if (desc.count == 0)

continue;

desc.error = 0;

//将读取请求向下层函数发送

do_generic_file_read(filp, ppos, &desc, file_read_actor);

retval += desc.written;

if (desc.error) {

retval = retval ?: desc.error;

break;

}

if (desc.count > 0)

break;

}

out:

return retval;

}

/*****************************************************************************/

在看函数do_generic_file_read前有必要先看看函数file_read_actor。

int file_read_actor(read_descriptor_t *desc, struct page *page,

unsigned long offset, unsigned long size)

{

。

//大部分时间读写操作即是对缓存的读写，如果在缓存中找到了要读写的数据块则直接拷贝

//到用户空间。

left = __copy_to_user(desc->arg.buf, kaddr + offset, size);

。

}

/*****************************************************************************/

static void do_generic_file_read(struct file *filp, loff_t *ppos,

read_descriptor_t *desc, read_actor_t actor)

{

struct address_space *mapping = filp->f_mapping;

struct inode *inode = mapping->host;

struct file_ra_state *ra = &filp->f_ra;

pgoff_t index;

pgoff_t last_index;

pgoff_t prev_index;

unsigned long offset; /* offset into pagecache page */

unsigned int prev_offset;

int error;

//找到页面的偏移量。即确定是存储在那个存面中

index = *ppos >> PAGE_CACHE_SHIFT;

prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT;

prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1);//获取页内偏移

//计算要读取数据在存储空间中的最后页面索引号

last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;

offset = *ppos & ~PAGE_CACHE_MASK;

for (;;) {

struct page *page;

pgoff_t end_index;

loff_t isize;

unsigned long nr, ret;

//检查当前进程是否设置了重新调度标志`如果有`调用schdule()重新调度一次

cond_resched();

find_page:

//寻找当前位置对应的缓存页

page = find_get_page(mapping, index);

if (!page) {

//没有找到对应的缓存页，说明在页缓存区中不存在此页面对应的缓存页 page_cache_sync_readahead(mapping,

ra, filp,

index, last_index - index);

page = find_get_page(mapping, index);

if (unlikely(page == NULL))

goto no_cached_page;

}

if (PageReadahead(page)) {

//文件预读

page_cache_async_readahead(mapping,

ra, filp, page,

index, last_index - index);

}

//在页缓存区中找到了相关的页面

//检查PG_uptodata标志是否被设置`如果这个标志被设置的话，就不需要从设备

//上去读取了

if (!PageUptodate(page)) {

//页面没有设置PG_uptodata`页面中的内容无效，所以要从文件系统中把数据读取出来

if (inode->i_blkbits == PAGE_CACHE_SHIFT ||

!mapping->a_ops->is_partially_uptodate)

goto page_not_up_to_date;

if (!trylock_page(page))

goto page_not_up_to_date;

if (!mapping->a_ops->is_partially_uptodate(page,

desc, offset))

goto page_not_up_to_date_locked;

unlock_page(page);

}

page_ok:

//inode对应的文件大小

isize = i_size_read(inode);

end_index = (isize - 1) >> PAGE_CACHE_SHIFT;

if (unlikely(!isize || index > end_index)) {

page_cache_release(page);

goto out;

}

/* nr is the maximum number of bytes to copy from this page */

nr = PAGE_CACHE_SIZE;

if (index == end_index) {

nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;

if (nr <= offset) {

page_cache_release(page);

goto out;

}

nr = nr - offset;

if (mapping_writably_mapped(mapping))

flush_dcache_page(page);

if (prev_index != index || offset != prev_offset)

mark_page_accessed(page);

prev_index = index;

//在此说明要读取的页面在缓存中找到

//页面与用户空间的值拷贝.返回拷贝的数据数

ret = actor(desc, page, offset, nr);

offset += ret;

index += offset >> PAGE_CACHE_SHIFT;

offset &= ~PAGE_CACHE_MASK;

prev_offset = offset;

page_cache_release(page);

if (ret == nr && desc->count)

continue;

goto out;

page_not_up_to_date:

/* Get exclusive access to the page ... */

//要从文件系统中传数据到此页面上。将此页面锁定

error = lock_page_killable(page);

if (unlikely(error))

goto readpage_error;

page_not_up_to_date_locked:

/* Did it get truncated before we got the lock? */

//有可能在锁页面的时候`有其它的进程将页面移除了页缓存区

//在这种情况下：将page解锁`并减少它的使用计数，重新循环```

//重新进入循环后，在页缓存区找不到对应的page.就会重新分配一个新的page

if (!page->mapping) {

unlock_page(page);

page_cache_release(page);

continue;

}

//在加锁的时候，有其它的进程完成了从文件系统到具体页面的映射?

//在这种情况下，返回到page_ok.直接将页面上的内容copy到用户空间即可

/* Did somebody else fill it already? */

if (PageUptodate(page)) {

unlock_page(page);

goto page_ok;

}

readpage:

/* Start the actual read. The read will unlock the page. */

//在缓存中没找到对应的页面，则从硬件中读取数据

error = mapping->a_ops->readpage(filp, page);

if (unlikely(error)) {

if (error == AOP_TRUNCATED_PAGE) {

page_cache_release(page);

goto find_page;

}

goto readpage_error;

}

//如果PG_uptodata标志仍然末设置.就一直等待，一直到page不处于锁定状态

// TODO: 在将文件系统的内容读入page之前，page一直是处理Lock状态的。一直到

//读取完成后，才会将页面解锁. 然后将进程唤醒

if (!PageUptodate(page)) {

error = lock_page_killable(page);

if (unlikely(error))

goto readpage_error;

if (!PageUptodate(page)) {

if (page->mapping == NULL) {

* invalidate_inode_pages got it

unlock_page(page);

page_cache_release(page);

goto find_page;

}

unlock_page(page);

shrink_readahead_size_eio(filp, ra);

error = -EIO;

goto readpage_error;

}

unlock_page(page);

}

goto page_ok;

readpage_error:

/* UHHUH! A synchronous read error occurred. Report it */

desc->error = error;

page_cache_release(page);

goto out;

no_cached_page:

* Ok, it wasn't cached, so we need to create a new

* page..

//在页缓区中没有相关的缓存页

//新分匹一个页面

page = page_cache_alloc_cold(mapping);

if (!page) {

desc->error = -ENOMEM;

goto out;

}

//将分得的页加到页缓存区和LRU

// TODO:在将新页面插入页缓存区域中，会将页面标志设置为PG_locked

error = add_to_page_cache_lru(page, mapping,

index, GFP_KERNEL);

if (error) {

page_cache_release(page);

if (error == -EEXIST)

goto find_page;

desc->error = error;

goto out;

}

goto readpage;

}

out:

ra->prev_pos = prev_index;

ra->prev_pos <<= PAGE_CACHE_SHIFT;

ra->prev_pos |= prev_offset;

*ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset;

file_accessed(filp);

}

/*****************************************************************************/

接下来看看如何从块设备中读取数据的mapping->a_ops->readpage(filp, page)。
file->f_mapping是从对应inode->i_mapping而来，inode->i_mapping->a_ops是由对应的文件系统类型在生成这个inode时赋予的。

在分析块设备文件打开时对此有过分析。

//块设备页读取函数是函数block_read_full_page的封装。

static int blkdev_readpage(struct file * file, struct page * page)

{

return block_read_full_page(page, blkdev_get_block);

}

/*****************************************************************************/

int block_read_full_page(struct page *page, get_block_t *get_block)

{

struct inode *inode = page->mapping->host;

sector_t iblock, lblock;

struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];

unsigned int blocksize;

int nr, i;

int fully_mapped = 1;

BUG_ON(!PageLocked(page));

blocksize = 1 << inode->i_blkbits;

if (!page_has_buffers(page))

create_empty_buffers(page, blocksize, 0);

//文件系统实现向块设备读写数据块。所有的快数据读写请求以数据结构

//buffer_head的形式通过标准的核心调用交给块设备驱动。

head = page_buffers(page);

iblock=(sector_t)page->index<<(PAGE_CACHE_SHIFT- inode->i_blkbits);

lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits;

bh = head;

nr = 0;

i = 0;

do {

。

arr[nr++] = bh;

} while (i++, iblock++, (bh = bh->b_this_page) != head);

if (fully_mapped)

SetPageMappedToDisk(page);

。

for (i = 0; i < nr; i++) {

bh = arr[i];

lock_buffer(bh);

mark_buffer_async_read(bh);

}

* Stage 3: start the IO. Check for uptodateness

* inside the buffer lock in case another process reading

* the underlying blockdev brought it uptodate (the sct fix).

for (i = 0; i < nr; i++) {

bh = arr[i];

if (buffer_uptodate(bh))

end_buffer_async_read(bh, 1);

Else

//调用submit_bio提交这组bio

submit_bh(READ, bh);

}

return 0;

}

/*****************************************************************************/

int submit_bh(int rw, struct buffer_head * bh)

{

struct bio *bio;

int ret = 0;

。

bio = bio_alloc(GFP_NOIO, 1);

bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);

bio->bi_bdev = bh->b_bdev;

bio->bi_io_vec[0].bv_page = bh->b_page;

bio->bi_io_vec[0].bv_len = bh->b_size;

bio->bi_io_vec[0].bv_offset = bh_offset(bh);

bio->bi_vcnt = 1;

bio->bi_idx = 0;

bio->bi_size = bh->b_size;

bio->bi_end_io = end_bio_bh_io_sync;

bio->bi_private = bh;

bio_get(bio);

//分配一个bio结构将请求继续向下传递。

submit_bio(rw, bio);

if (bio_flagged(bio, BIO_EOPNOTSUPP))

ret = -EOPNOTSUPP;

bio_put(bio);

return ret;

}

/*****************************************************************************/

void submit_bio(int rw, struct bio *bio)

{

int count = bio_sectors(bio);

bio->bi_rw |= rw;

。

//一个包装函数，对于每一个bio，调用__generic_make_request；

generic_make_request(bio);

}

/*****************************************************************************/

void generic_make_request(struct bio *bio)

{

。

BUG_ON(bio->bi_next);

do {

current->bio_list = bio->bi_next;

if (bio->bi_next == NULL)

current->bio_tail = ¤t->bio_list;

else

bio->bi_next = NULL;

//获取bio对应的块设备文件对应的磁盘对象的请求队列//bio->bi_bdev->bd_disk->queue，调用q->make_request_fn将bio添加到队//列；

__generic_make_request(bio);

bio = current->bio_list;

} while (bio);

current->bio_tail = NULL; /* deactivate */

}

/*****************************************************************************/

static inline void __generic_make_request(struct bio *bio)

{

struct request_queue *q;

。

do {

。

ret = q->make_request_fn(q, bio);

} while (ret);

。

}

/*****************************************************************************/

//现在该看看数据读取请求是如何被建立的了。

//每一个请求都是挂在请求队列头上的，请求队列头的分配和初始化是调用函数

//blk_init_queue来完成的。

//函数blk_init_queue不过只是函数blk_init_queue_node的包装。

struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)

{

return blk_init_queue_node(rfn, lock, -1);

}

/*****************************************************************************/

struct request_queue *

blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)

{

//为请求队列头request_queue分配内存，并初始化它的一些字段

//如工作队列q->unplug_work等。

struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id);

。

if (!lock)

lock = &q->__queue_lock;

//rfn就是请求处理函数。在MTD设备驱动中该函数的工作是唤醒内核线程

//让内核线程处理函数去处理数据的读写。

q->request_fn = rfn;

q->prep_rq_fn = NULL;

函数generic_unplug_device将会被工作队列q->unplug_work的处理函数所调用。

函数generic_unplug_device的执行最终会调用函数q->request_fn(q);

唤醒内核线程。

q->unplug_fn = generic_unplug_device;

q->queue_flags = QUEUE_FLAG_DEFAULT;

q->queue_lock = lock;

* This also sets hw/phys segments, boundary and size

//进一步初始化请求队列的各字段，将请求队列的q->make_request_fn

//指向请求创建函数__make_request。

blk_queue_make_request(q, __make_request);

q->sg_reserved_size = INT_MAX;

blk_set_cmd_filter_defaults(&q->cmd_filter);

* all done

if (!elevator_init(q, NULL)) {

blk_queue_congestion_threshold(q);

return q;

}

blk_put_queue(q);

return NULL;

}

/*****************************************************************************/

struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)

{

struct request_queue *q;

int err;

//为请求队列分配存储空间。

q = kmem_cache_alloc_node(blk_requestq_cachep,

gfp_mask | __GFP_ZERO, node_id);

。

初始化工作队列q->unplug_work。工作队列处理函数blk_unplug_work。

该函数将调用函数q->unplug_fn即generic_unplug_device。

而在函数generic_unplug_device;中又会调用函数q->request_fn(q)唤醒内核线程。

INIT_WORK(&q->unplug_work, blk_unplug_work);

kobject_init(&q->kobj, &blk_queue_ktype);

mutex_init(&q->sysfs_lock);

spin_lock_init(&q->__queue_lock);

return q;

}

/*****************************************************************************/

//该函数将请求队列的各字段初始化为一些默认值。

void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)

{

。

//让请求队列头的q->make_request_fn指向请求制造函数__make_request。

q->make_request_fn = mfn;

。

该定时器的超时处理函数blk_unplug_timeout的工作是将工作队列

q->unplug_work添加到工作队列头kblockd_workqueue。

queue_work(kblockd_workqueue, work);

在文件blk-core.c中被定义并创建。

在该文件中用这样一个初始化函数：

int __init blk_dev_init(void)

{//创建工作队列头

kblockd_workqueue = create_workqueue("kblockd");

if (!kblockd_workqueue)

panic("Failed to create kblockd\n");

//创建一个内存池，请求结构体创建时就是在该内存池中获取内存的。

request_cachep = kmem_cache_create("blkdev_requests",

sizeof(struct request), 0, SLAB_PANIC, NULL);

//在请求队列结构体创建时是在内存池blkdev_queue中获取内存的。

blk_requestq_cachep = kmem_cache_create("blkdev_queue",

sizeof(struct request_queue), 0, SLAB_PANIC, NULL);

return 0;

}

q->unplug_timer.function = blk_unplug_timeout;

q->unplug_timer.data = (unsigned long)q;

* by default assume old behaviour and bounce for any highmem page

blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);

}

/*****************************************************************************/

在上层函数中创建请求结构是调用函数q->make_request_fn，实际上是调用了函数__make_request。

static int __make_request(struct request_queue *q, struct bio *bio)

{

struct request *req;

int el_ret, nr_sectors;

const unsigned short prio = bio_prio(bio);

const int sync = bio_sync(bio);

const int unplug = bio_unplug(bio);

int rw_flags;

。

elv_merge函数相当重要，它试图在请求队列中找到一个能够合并该bio的request，函数返回三个可能值：

ELEVATOR_NO_MERGE：队列已经存在的请求中不能包含bio结构，需

要创建一个新请求。

ELEVATOR_BACK_MERGE：bio结构可作为末尾的bio而插入到某个请

求中；

ELEVATOR_FRONT_MERGE：bio结构可作为某个请求的第一个bio

被插入；

el_ret = elv_merge(q, &req, bio);

switch (el_ret) {

case ELEVATOR_BACK_MERGE:

。

case ELEVATOR_FRONT_MERGE:

。

default:

;

}

get_rq:

rw_flags = bio_data_dir(bio);

if (sync)

rw_flags |= REQ_RW_SYNC;

//分配一个请求

req = get_request_wait(q, rw_flags, bio);

用bio初始化该请求，

并在函数init_request_from_bio->blk_rq_bio_prep 中建立如下指向关系：

if (bio->bi_bdev)

rq->rq_disk = bio->bi_bdev->bd_disk;

bio->bi_bdev在bio中的定义为struct block_device *bi_bdev;

bio->bi_bdev->bd_disk在block_device中的定义为struct gendisk * bd_disk;

rq->rq_disk在request中的定义为struct gendisk *rq_disk;

一个请求队列管理着很多请求，但是每一个请求都只能针对一个块设备gendisk。

所以每一个请求被创建出来后都会指向它的请求对象gendisk。

init_request_from_bio(req, bio);

。

I/O调度器的功能是排列请求队列中的请求，以使系统或得最好性能。在I/O调度器的使用时需要多个请求才能使其获得最好效果。为此系统提供了函数blk_plug_device()，当该函数

被调用时，请求队列被阻塞，请求暂时得不到处理。等到请求能获得一个最优排列状态时调用函数blk_remove_plug()解除阻塞，处理队列中的请求。

if (queue_should_plug(q) && elv_queue_empty(q))

blk_plug_device(q);

//将请求根据调度算法插入请求队列。

add_request(q, req);

out:

//函数__generic_unplug_device会调用请求处理函数q->request_fn(q);

//唤醒内核线程处理请求。

if (unplug || !queue_should_plug(q))

__generic_unplug_device(q);

spin_unlock_irq(q->queue_lock);

return 0;

}

阅读(2053) | 评论(0) | 转发(0) |

上一篇：MTD块设备的数据写

下一篇：MTD块设备的打开过程

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6