2012年(44)
分类: LINUX
2012-08-08 17:19:40
2010-07-21 21:43:55| 分类: arm linux设备驱 | 标签: |字号大中小
三,数据写
数据写也分为同步写do_sync_write(),
和异步写generic_file_aio_write_nolock().
它们的区别是同步写要等数据写完函数才返回,异步写在函数返回时数据可能还没写入目的存储空间。
ssize_t generic_file_aio_write_nolock(struct kiocb *iocb,
const struct iovec *iov, unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
ssize_t ret;
BUG_ON(iocb->ki_pos != pos);
//将数据写入缓存
ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs,
&iocb->ki_pos);
//如果文件置同步标志O_SYNC则将缓存中的数据写入磁盘。
if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
ssize_t err;
err = sync_page_range_nolock(inode, mapping, pos, ret);
if (err < 0)
ret = err;
}
return ret;
}
/*****************************************************************************/
__generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t *ppos)
{
struct file *file = iocb->ki_filp;
struct address_space * mapping = file->f_mapping;
size_t ocount; /* original count */
size_t count; /* after file limit checks */
struct inode *inode = mapping->host;
loff_t pos;
ssize_t written;
ssize_t err;
ocount = 0;
//对用户给定存储区域进行有效性检查
err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
if (err)
return err;
//count: 要write的字节总数
count = ocount;
pos = *ppos; //ppos:当前的位置
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
/* We can write back this queue in page reclaim */
current->backing_dev_info = mapping->backing_dev_info;
written = 0;
//对写操作的详细检查
err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
if (err)
goto out;
if (count == 0)
goto out;
err = file_remove_suid(file);
if (err)
goto out;
//更新索引结点的时间戳信息
file_update_time(file);
/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
/*
如果文件打开时带有了O_DIRECT标志,则会跳过文件缓存直接将数据写到文件系统中。。对于一般的情况,都会转入到generic_file_buffered_write():
*/
if (unlikely(file->f_flags & O_DIRECT)) {
。
。
。
} else {
/*
* We don't know how much we wrote, so just return
* the number of bytes which were direct-written
*/
}
} else {
/*
调用generic_perform_write执行写,写完成之后,判断如果文件打开时使用了O_SYNC标记,则再调用generic_osync_inode将写入到磁盘高速缓存中的数据同步到磁盘(同步文件头信息和文件内容);
*/
written = generic_file_buffered_write(iocb, iov, nr_segs,
pos, ppos, count, written);
}
out:
current->backing_dev_info = NULL;
return written ? written : err;
}
/*****************************************************************************/
generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos, loff_t *ppos,
size_t count, ssize_t written)
{
。
。
。
//将数据写入缓存。
status = generic_perform_write(file, &i, pos);
if (likely(status >= 0)) {
written += status;
*ppos = pos + status;
//如果文件置标志O_SYNC则将写入缓存的数据同步写入磁盘。
if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
if (!a_ops->writepage || !is_sync_kiocb(iocb))
status = generic_osync_inode(inode, mapping,
OSYNC_METADATA|OSYNC_DATA);
}
}
/*
* If we get here for O_DIRECT writes then we must have fallen through
* to buffered writes (block instantiation inside i_size). So we sync
* the file data here, to try to honour O_DIRECT expectations.
*/
if (unlikely(file->f_flags & O_DIRECT) && written)
status = filemap_write_and_wait_range(mapping,
pos, pos + written - 1);
return written ? written : status;
}
/*****************************************************************************/
static ssize_t generic_perform_write(struct file *file,
struct iov_iter *i, loff_t pos)
{
。
。
。
do {
。
。
。
//offset: 页面中的偏移
offset = (pos & (PAGE_CACHE_SIZE - 1));
//当前位置的 页面序号
index = pos >> PAGE_CACHE_SHIFT;
//页面中的剩余信息
bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
iov_iter_count(i));
again:
。
。
。
/*
调用grab_cache_page_write_begin在radix树里面查找要被写的page,如果不存在则创建一个。调用__block_prepare_write为这个page准备一组buffer_head结构,用于描述组成这个page的数据块(利用其中的信息,可以生成对应的bio结构);
*/
status = a_ops->write_begin(file, mapping, pos, bytes, flags,
&page, &fsdata);
if (unlikely(status))
break;
pagefault_disable();
/*
函数file->f_mapping->a_ops->write_begin已经准备好了需要写的磁盘高速缓存页面,以下函数即是将需要写的数据拷入其中
*/
copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
pagefault_enable();
flush_dcache_page(page);
mark_page_accessed(page);
/*
调用__block_commit_write为page中的每一个buffer_head结构设置dirty标记;至此,write调用就要返回了。如果文件打开时使用了O_SYNC标记,sync_page_range或generic_osync_inode将被调用。否则write就结束了,等待pdflush内核线程发现radix树上的脏页,并最终调用到do_writepages写回这些脏页;
*/
status = a_ops->write_end(file, mapping, pos, bytes, copied,
page, fsdata);
。
。
。
} while (iov_iter_count(i));
return written ? written : status;
}
/*****************************************************************************/
要将写入缓冲区的数据写入写入磁盘需要调用函数blkdev_writepage。
//该函数只是函数block_write_full_page的包装。
blkdev_writepage(struct page *page, struct writeback_control *wbc)
{
return block_write_full_page(page, blkdev_get_block, wbc);
}
/*****************************************************************************/
//函数block_write_full_page有是函数block_write_full_page_endio的包
//装。
int block_write_full_page(struct page *page, get_block_t *get_block,
struct writeback_control *wbc)
{
return block_write_full_page_endio(page, get_block, wbc,
end_buffer_async_write);
}
/*****************************************************************************/
int block_write_full_page_endio(struct page *page, get_block_t *get_block,
struct writeback_control *wbc, bh_end_io_t *handler)
{
struct inode * const inode = page->mapping->host;
loff_t i_size = i_size_read(inode);
const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
unsigned offset;
/* Is the page fully inside i_size? */
//如果这一整页的空间都在要写入的数据i_size之内(即i_size大于一页的数
//据量)就直接调用函数__block_write_full_page
if (page->index < end_index)
return __block_write_full_page(inode, page, get_block, wbc,
handler);
/* Is the page fully outside i_size? (truncate in progress) */
offset = i_size & (PAGE_CACHE_SIZE-1);
//该页在i_size要写入的存储空间之外就。。。。。
if (page->index >= end_index+1 || !offset) {
/*
* The page may have dirty, unmapped buffers. For example,
* they may have been added in ext3_writepage(). Make them
* freeable here, so the page does not leak.
*/
do_invalidatepage(page, 0);
unlock_page(page);
return 0; /* don't care */
}
//如果要写入的数据量i_size比较小完全被该页包含就将数据写入该页
//并将该页中剩余的空间清零。
zero_user_segment(page, offset, PAGE_CACHE_SIZE);
return __block_write_full_page(inode, page, get_block, wbc, handler);
}
/*****************************************************************************/
static int __block_write_full_page(struct inode *inode, struct page *page,
get_block_t *get_block, struct writeback_control *wbc,
bh_end_io_t *handler)
{
。
。
。
//获取缓存管理结构体buffer_head。
head = page_buffers(page);
bh = head;
do {
。
。
。
bh = bh->b_this_page;
block++;
} while (bh != head);
do {
。
。
。
} while ((bh = bh->b_this_page) != head);
BUG_ON(PageWriteback(page));
set_page_writeback(page);
do {
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
//分配结构体bio并调用函数generic_make_request
//将bio提交到磁盘驱动维护的请求队列中;
submit_bh(write_op, bh);
nr_underway++;
}
bh = next;
} while (bh != head);
unlock_page(page);
err = 0;
done:
。
。
。
recover:
。
。
。
goto done;
}