Chinaunix首页 | 论坛 | 博客
  • 博客访问: 293348
  • 博文数量: 34
  • 博客积分: 1400
  • 博客等级: 上尉
  • 技术积分: 433
  • 用 户 组: 普通用户
  • 注册时间: 2008-07-23 13:16
文章分类

全部博文(34)

文章存档

2011年(3)

2010年(4)

2009年(6)

2008年(21)

我的朋友

分类: LINUX

2008-07-23 15:45:08

1、准备工作。通过VFS层,及一些初始化操作,为真正的读操作做准备。

首先是用户进程通过read系统调用发出一个读请求:

文件/fs/read_write.c
asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count)
{
    struct file *file;
    ssize_t ret = -EBADF;
    int fput_needed;

    file = fget_light(fd, &fput_needed);
    if (file) {
        loff_t pos = file_pos_read(file);
        ret = vfs_read(file, buf, count, &pos);
        file_pos_write(file, pos);
        fput_light(file, fput_needed);
    }

    return ret;
}

然后通过VFS层操作:
ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
{
    ssize_t ret;

    if (!(file->f_mode & FMODE_READ))
        return -EBADF;
    if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
        return -EINVAL;
    if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
        return -EFAULT;

    ret = rw_verify_area(READ, file, pos, count);
    if (!ret) {
        ret = security_file_permission (file, MAY_READ);
        if (!ret) {
            if (file->f_op->read)
                ret = file->f_op->read(file, buf, count, pos);
            else
                ret = do_sync_read(file, buf, count, pos);
            if (ret > 0) {
                fsnotify_access(file->f_dentry);
                current->rchar += ret;
            }
            current->syscr++;
        }
    }

    return ret;
}

do_sync_read最后还是调用了aio_read()接口函数来完成读操作,即在2.6中,aio_read()为同步和异步读操作的通用接口。
ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
    struct kiocb kiocb;
    ssize_t ret;

    init_sync_kiocb(&kiocb, filp);
    kiocb.ki_pos = *ppos;
    ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos);
    if (-EIOCBQUEUED == ret)
        ret = wait_on_sync_kiocb(&kiocb);
    *ppos = kiocb.ki_pos;
    return ret;
}

对于ext2文件系统,有:
const struct file_operations ext2_file_operations = {
       .llseek   = generic_file_llseek,
       .read     = generic_file_read,
       .write    = generic_file_write,
       .aio_read = generic_file_aio_read,
       .aio_write = generic_file_aio_write,
       .......
}

文件/mm/filemap.c
ssize_t
generic_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
{
    struct iovec local_iov = { .iov_base = buf, .iov_len = count };
    struct kiocb kiocb;
    ssize_t ret;

    init_sync_kiocb(&kiocb, filp);
    ret = __generic_file_aio_read(&kiocb, &local_iov, 1, ppos);
    if (-EIOCBQUEUED == ret)
        ret = wait_on_sync_kiocb(&kiocb);
    return ret;
}

ssize_t
__generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
        unsigned long nr_segs, loff_t *ppos)
{
    ......
    do_generic_file_read(filp,ppos,&desc,file_read_actor);
    ......
}

文件/include/linux/fs.h
static inline void do_generic_file_read(struct file * filp, loff_t *ppos,
                    read_descriptor_t * desc,
                    read_actor_t actor)
{
    do_generic_mapping_read(filp->f_mapping,
                &filp->f_ra,
                filp,
                ppos,
                desc,
                actor);
}


2、读入操作。完成了上面的准备工作,下一步就是执行读操作的核心函数do_generic_mapping_read,这是一个比较复杂的函数,里面有大量的goto跳转,但还是比较清晰的。

它工作过程可以描述如下:

a. 如果所要读取的文件在页面缓存中,则跳转到步骤d。

b. 文件还没有被缓冲,所以要从设备中去读取,首先分配一个页面,并将这个页面链入到相应的address_space中去

c. 然后调用address_space中的readpage()函数,去从设备中读出一个页面大小的数据到这个页面缓存中。

d. 检查PageUptodate(page)

e. 调用由参数传入的actor函数指针,在此为file_read_actor(),将数据中页面缓存中拷贝到用户缓冲区。

f. 如果请求读取的数据长度已完成,则函数返回,否则跳转到步骤a重复执行。

先看看file_read_actor():

int file_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset, unsigned long size)

{

       char *kaddr;
       unsigned long left, count = desc->count;

       if (size > count)
              size = count;
       ……

       /* Do it the slow way */
       kaddr = kmap(page);
       left = __copy_to_user(desc->arg.buf, kaddr + offset, size); //将数据拷贝到用户空间
       kunmap(page);

       if (left) {
              size -= left;
              desc->error = -EFAULT;

       }

success:
       desc->count = count - size;
       desc->written += size;
       desc->arg.buf += size;
       return size;

}

文件/mm/filemap.c
/**
* This is a generic file read routine, and uses the
* mapping->a_ops->readpage() function for the actual low-level stuff.
*/
void do_generic_mapping_read(struct address_space *mapping,
                          struct file_ra_state *_ra,
                          struct file *filp,
                          loff_t *ppos,
                          read_descriptor_t *desc,
                          read_actor_t actor)
{
       struct inode *inode = mapping->host;
       unsigned long index;
       unsigned long end_index;
       unsigned long offset;
       unsigned long last_index;
       unsigned long next_index;
       unsigned long prev_index;
       loff_t isize;
       struct page *cached_page;
       int error;
       struct file_ra_state ra = *_ra;

       cached_page = NULL;
       index = *ppos >> PAGE_CACHE_SHIFT;
       next_index = index;
       prev_index = ra.prev_page;
       last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
       offset = *ppos & ~PAGE_CACHE_MASK;

       isize = i_size_read(inode);
       if (!isize)
              goto out;

       end_index = (isize - 1) >> PAGE_CACHE_SHIFT;

       for (;;) {
              struct page *page;
              unsigned long nr, ret;

              /* nr is the maximum number of bytes to copy from this page */
              nr = PAGE_CACHE_SIZE;
              if (index >= end_index) {
                     if (index > end_index)
                            goto out;

                     nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
                     if (nr <= offset) {
                            goto out;
                     }
              }
              nr = nr - offset;
              cond_resched();
              if (index == next_index)
                     next_index = page_cache_readahead(mapping, &ra, filp, index, last_index - index);

find_page:
              page = find_get_page(mapping, index); //在缓存中查找
              if (unlikely(page == NULL)) {
                     handle_ra_miss(mapping, &ra, index);
                     goto no_cached_page; //没有找到
              }
              if (!PageUptodate(page)) //Uptodate
                     goto page_not_up_to_date;

page_ok: //找到了相关缓存页面
              ret = actor(desc, page, offset, nr); //拷贝数据到用户缓冲区
              //更新一些变量值
              offset += ret;
              index += offset >> PAGE_CACHE_SHIFT;
              offset &= ~PAGE_CACHE_MASK;
              page_cache_release(page);
              if (ret == nr && desc->count)
                     continue; //未完成,进入下一次循环
              goto out; //完成

page_not_up_to_date:
              /* Get exclusive access to the page ... */
              lock_page(page);
              /* Did it get truncated before we got the lock? */
              if (!page->mapping) {
                     unlock_page(page);
                     page_cache_release(page);
                     continue;
              }

              /* Did somebody else fill it already? */
              if (PageUptodate(page)) {
                     unlock_page(page);
                     goto page_ok;
              }

readpage: //读操作

              /* Start the actual read. The read will unlock the page. */
              error = mapping->a_ops->readpage(filp, page); //真正的读操作
              ……             
              /* nr is the maximum number of bytes to copy from this page */
              nr = PAGE_CACHE_SIZE;
              if (index == end_index) {
                     nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
                     if (nr <= offset) {
                            page_cache_release(page);
                            goto out;
                     }
              }
              nr = nr - offset;
              goto page_ok;

readpage_error:
              /* UHHUH! A synchronous read error occurred. Report it */
              desc->error = error;
              page_cache_release(page);
              goto out;

no_cached_page: //分配一个新的页面,比将它链入缓存树中。
              /*
              * Ok, it wasn't cached, so we need to create a new
              * page..
              */

              if (!cached_page) {
                     cached_page = page_cache_alloc_cold(mapping);
                     if (!cached_page) {
                            desc->error = -ENOMEM;
                            goto out;
                     }
              }
              error = add_to_page_cache_lru(cached_page, mapping, index, GFP_KERNEL);
              page = cached_page;
              cached_page = NULL;
              goto readpage;
       }

out:
       *_ra = ra;
       *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
       if (cached_page)
              page_cache_release(cached_page);
       if (filp)
              file_accessed(filp);
}

3、从设备读取

对于不同的文件系统有不同的address_space,而且有不同的address_space_operations,对于ext2文件系统来说,这个是如下一个结构:

struct address_space_operations ext2_aops = {
    .readpage        = ext2_readpage,
    .readpages       = ext2_readpages,
    .writepage       = ext2_writepage,
    .sync_page       = block_sync_page,
    .prepare_write   = ext2_prepare_write,
    .commit_write    = generic_commit_write,
    .bmap            = ext2_bmap,
    .direct_IO       = ext2_direct_IO,
    .writepages      = ext2_writepages,
};

可见,这个readpage()便是ext2_readpage(),它负责从设备中读取一个页面。

static int ext2_readpage(struct file *file, struct page *page)
{
       return mpage_readpage(page, ext2_get_block);
}

/*
* This isn't called much at all
*/
int mpage_readpage(struct page *page, get_block_t get_block)
{
       struct bio *bio = NULL;
       sector_t last_block_in_bio = 0;
       struct buffer_head map_bh;
       unsigned long first_logical_block = 0;
       clear_buffer_mapped(&map_bh);

       bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,&map_bh, &first_logical_block, get_block);
       if (bio)
              mpage_bio_submit(READ, bio);

       return 0;

}

这个函数最终将读请求转成submit_bio(),之后就是通用块层的事情了。

阅读(2035) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~