1、准备工作。通过VFS层,及一些初始化操作,为真正的读操作做准备。
首先是用户进程通过read系统调用发出一个读请求:
文件/fs/read_write.c
asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count)
{
struct file *file;
ssize_t ret = -EBADF;
int fput_needed;
file = fget_light(fd, &fput_needed);
if (file) {
loff_t pos = file_pos_read(file);
ret = vfs_read(file, buf, count, &pos);
file_pos_write(file, pos);
fput_light(file, fput_needed);
}
return ret;
}
然后通过VFS层操作:
ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
{
ssize_t ret;
if (!(file->f_mode & FMODE_READ))
return -EBADF;
if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
return -EINVAL;
if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
return -EFAULT;
ret = rw_verify_area(READ, file, pos, count);
if (!ret) {
ret = security_file_permission (file, MAY_READ);
if (!ret) {
if (file->f_op->read)
ret = file->f_op->read(file, buf, count, pos);
else
ret = do_sync_read(file, buf, count, pos);
if (ret > 0) {
fsnotify_access(file->f_dentry);
current->rchar += ret;
}
current->syscr++;
}
}
return ret;
}
do_sync_read最后还是调用了aio_read()接口函数来完成读操作,即在2.6中,aio_read()为同步和异步读操作的通用接口。
ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
struct kiocb kiocb;
ssize_t ret;
init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
*ppos = kiocb.ki_pos;
return ret;
}
对于ext2文件系统,有:
const struct file_operations ext2_file_operations = {
.llseek = generic_file_llseek,
.read = generic_file_read,
.write = generic_file_write,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.......
}
文件/mm/filemap.c
ssize_t
generic_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
{
struct iovec local_iov = { .iov_base = buf, .iov_len = count };
struct kiocb kiocb;
ssize_t ret;
init_sync_kiocb(&kiocb, filp);
ret = __generic_file_aio_read(&kiocb, &local_iov, 1, ppos);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
return ret;
}
ssize_t
__generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t *ppos)
{
......
do_generic_file_read(filp,ppos,&desc,file_read_actor);
......
}
文件/include/linux/fs.h
static inline void do_generic_file_read(struct file * filp, loff_t *ppos,
read_descriptor_t * desc,
read_actor_t actor)
{
do_generic_mapping_read(filp->f_mapping,
&filp->f_ra,
filp,
ppos,
desc,
actor);
}
2、读入操作。完成了上面的准备工作,下一步就是执行读操作的核心函数do_generic_mapping_read,这是一个比较复杂的函数,里面有大量的goto跳转,但还是比较清晰的。
它工作过程可以描述如下:
a. 如果所要读取的文件在页面缓存中,则跳转到步骤d。
b. 文件还没有被缓冲,所以要从设备中去读取,首先分配一个页面,并将这个页面链入到相应的address_space中去
c. 然后调用address_space中的readpage()函数,去从设备中读出一个页面大小的数据到这个页面缓存中。
d. 检查PageUptodate(page)
e. 调用由参数传入的actor函数指针,在此为file_read_actor(),将数据中页面缓存中拷贝到用户缓冲区。
f. 如果请求读取的数据长度已完成,则函数返回,否则跳转到步骤a重复执行。
先看看file_read_actor():
int file_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset, unsigned long size)
{
char *kaddr;
unsigned long left, count = desc->count;
if (size > count)
size = count;
……
/* Do it the slow way */
kaddr = kmap(page);
left = __copy_to_user(desc->arg.buf, kaddr + offset, size); //将数据拷贝到用户空间
kunmap(page);
if (left) {
size -= left;
desc->error = -EFAULT;
}
success:
desc->count = count - size;
desc->written += size;
desc->arg.buf += size;
return size;
}
文件/mm/filemap.c
/**
* This is a generic file read routine, and uses the
* mapping->a_ops->readpage() function for the actual low-level stuff.
*/
void do_generic_mapping_read(struct address_space *mapping,
struct file_ra_state *_ra,
struct file *filp,
loff_t *ppos,
read_descriptor_t *desc,
read_actor_t actor)
{
struct inode *inode = mapping->host;
unsigned long index;
unsigned long end_index;
unsigned long offset;
unsigned long last_index;
unsigned long next_index;
unsigned long prev_index;
loff_t isize;
struct page *cached_page;
int error;
struct file_ra_state ra = *_ra;
cached_page = NULL;
index = *ppos >> PAGE_CACHE_SHIFT;
next_index = index;
prev_index = ra.prev_page;
last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
offset = *ppos & ~PAGE_CACHE_MASK;
isize = i_size_read(inode);
if (!isize)
goto out;
end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
for (;;) {
struct page *page;
unsigned long nr, ret;
/* nr is the maximum number of bytes to copy from this page */
nr = PAGE_CACHE_SIZE;
if (index >= end_index) {
if (index > end_index)
goto out;
nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
if (nr <= offset) {
goto out;
}
}
nr = nr - offset;
cond_resched();
if (index == next_index)
next_index = page_cache_readahead(mapping, &ra, filp, index, last_index - index);
find_page:
page = find_get_page(mapping, index); //在缓存中查找
if (unlikely(page == NULL)) {
handle_ra_miss(mapping, &ra, index);
goto no_cached_page; //没有找到
}
if (!PageUptodate(page)) //Uptodate
goto page_not_up_to_date;
page_ok: //找到了相关缓存页面
ret = actor(desc, page, offset, nr); //拷贝数据到用户缓冲区
//更新一些变量值
offset += ret;
index += offset >> PAGE_CACHE_SHIFT;
offset &= ~PAGE_CACHE_MASK;
page_cache_release(page);
if (ret == nr && desc->count)
continue; //未完成,进入下一次循环
goto out; //完成
page_not_up_to_date:
/* Get exclusive access to the page ... */
lock_page(page);
/* Did it get truncated before we got the lock? */
if (!page->mapping) {
unlock_page(page);
page_cache_release(page);
continue;
}
/* Did somebody else fill it already? */
if (PageUptodate(page)) {
unlock_page(page);
goto page_ok;
}
readpage: //读操作
/* Start the actual read. The read will unlock the page. */
error = mapping->a_ops->readpage(filp, page); //真正的读操作
……
/* nr is the maximum number of bytes to copy from this page */
nr = PAGE_CACHE_SIZE;
if (index == end_index) {
nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
if (nr <= offset) {
page_cache_release(page);
goto out;
}
}
nr = nr - offset;
goto page_ok;
readpage_error:
/* UHHUH! A synchronous read error occurred. Report it */
desc->error = error;
page_cache_release(page);
goto out;
no_cached_page: //分配一个新的页面,比将它链入缓存树中。
/*
* Ok, it wasn't cached, so we need to create a new
* page..
*/
if (!cached_page) {
cached_page = page_cache_alloc_cold(mapping);
if (!cached_page) {
desc->error = -ENOMEM;
goto out;
}
}
error = add_to_page_cache_lru(cached_page, mapping, index, GFP_KERNEL);
page = cached_page;
cached_page = NULL;
goto readpage;
}
out:
*_ra = ra;
*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
if (cached_page)
page_cache_release(cached_page);
if (filp)
file_accessed(filp);
}
3、从设备读取
对于不同的文件系统有不同的address_space,而且有不同的address_space_operations,对于ext2文件系统来说,这个是如下一个结构:
struct address_space_operations ext2_aops = {
.readpage = ext2_readpage,
.readpages = ext2_readpages,
.writepage = ext2_writepage,
.sync_page = block_sync_page,
.prepare_write = ext2_prepare_write,
.commit_write = generic_commit_write,
.bmap = ext2_bmap,
.direct_IO = ext2_direct_IO,
.writepages = ext2_writepages,
};
可见,这个readpage()便是ext2_readpage(),它负责从设备中读取一个页面。
static int ext2_readpage(struct file *file, struct page *page)
{
return mpage_readpage(page, ext2_get_block);
}
/*
* This isn't called much at all
*/
int mpage_readpage(struct page *page, get_block_t get_block)
{
struct bio *bio = NULL;
sector_t last_block_in_bio = 0;
struct buffer_head map_bh;
unsigned long first_logical_block = 0;
clear_buffer_mapped(&map_bh);
bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,&map_bh, &first_logical_block, get_block);
if (bio)
mpage_bio_submit(READ, bio);
return 0;
}
这个函数最终将读请求转成submit_bio(),之后就是通用块层的事情了。
阅读(2035) | 评论(0) | 转发(0) |