分类: LINUX
2015-07-11 13:56:35
原文地址:generic_commit_write解析 作者:zixin
int generic_commit_write(struct file *file, struct page *page,
unsigned from, unsigned to)
{
struct inode *inode = page->mapping->host;
/*
*求出写入后文件总字节数
*/
loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
__block_commit_write(inode,page,from,to);
/*释放高端内存*/
kunmap(page);
/*
*检查写入后是否扩大了文件
*如果总字节数>文件的字节数,更新inode->i_size字段
*并把inode标记为脏
*/
if (pos > inode->i_size) {
inode->i_size = pos;
mark_inode_dirty(inode);
}
return 0;
}
static int __block_commit_write(struct inode *inode, struct page *page,
unsigned from, unsigned to)
{
unsigned block_start, block_end;
int partial = 0, need_balance_dirty = 0;
unsigned blocksize;
struct buffer_head *bh, *head;
/*块大小*/
blocksize = 1 << inode->i_blkbits;
/*
*对页中每个buffer对应的bh
*block_start记录循环写入的总块大小
*/
for(bh = head = page->buffers, block_start = 0;
bh != head || !block_start;
block_start=block_end, bh = bh->b_this_page) {
block_end = block_start + blocksize;
if (block_end <= from || block_start >= to) {
if (!buffer_uptodate(bh))
partial = 1;
} else {
/*
*如果缓冲区与写入的范围(from与to)相交
*设置BH_Uptodate标志,即"一致"(其实buffer内容还未写入设备,但马上
*就会完成)
*atomic_set_buffer_dirty(bh)重置BH_Dirty标志位,该原子操作将返回
*BH_Dirty标志位的原有值BH_Dirty
*如果以前就为脏就不需要任何额外的操作函数就可以直接返回
*如果该缓冲区原来是"干净"的:从"干净" ——> "脏"
* 调用__mark_dirty根据BH_Dirty和BH_Lock标志的值把缓冲区移到适当
*的链表中(这里是插入BUF_DIRTY链表)
* 调用buffer_insert_inode_data_queue插入inode的脏缓冲区链表(文件
* 数据的那一个)
* need_balance_dirty = 1
*/
set_bit(BH_Uptodate, &bh->b_state);
if (!atomic_set_buffer_dirty(bh)) {
__mark_dirty(bh);
buffer_insert_inode_data_queue(bh, inode);
need_balance_dirty = 1;
}
}
}
/*如果bh是从"干净" ——> "脏"
*调用balance_dirty检查是否积累足够的"脏"页面(确认在BU)
*计算由balance_dirty_state()协助函数完成
*协助函数返回 -1 "脏"页面不足不唤醒bdflush
*协助函数返回 0 "脏"页面足够,唤醒bdflush,异步刷新
*协助函数返回 1 "脏"页面太多,唤醒bdflush,阻塞当前进程进行刷新
*/
if (need_balance_dirty)
balance_dirty();
/*
* is this a partial write that happened to make all buffers
* uptodate then we can optimize away a bogus readpage() for
* the next read(). Here we 'discover' wether the page went
* uptodate as a result of this (potentially partial) write.
*/
if (!partial)
SetPageUptodate(page);
return 0;
}
/*
*更新bh对象的b_flushtime成员的值,以确定该脏缓冲区回写磁盘的时间期限;
*调用refile_buffer()函数,将该bh对像移到新的lru_list链表中(在这里就是移到BUF_DIRTY链表中)
*/
inline void __mark_dirty(struct buffer_head *bh)
{
bh->b_flushtime = jiffies + bdf_prm.b_un.age_buffer;
refile_buffer(bh);
}
void refile_buffer(struct buffer_head *bh)
{
/*
*对于已用bh,其bh->b_list字段为所在链表索引即在lru_list[]中的序号,表示该
*缓冲区的使用状态(如BH_CLEAN = 0)
*lru_list指针数组存放每个链表的第一个元素的地址
*自旋锁保护这些指针数组免受多处理器系统的并发访问
*/
spin_lock(&lru_list_lock);
__refile_buffer(bh);
spin_unlock(&lru_list_lock);
}
/*
* A buffer may need to be moved from one buffer list to another
* (e.g. in case it is not shared any more). Handle this.
*/
/*
*根据BH_Dirty和BH_Lock标志的值把缓冲区移到适当的链表中
*由于b_list值表示该bh对像当前所处的lru_list链表。
*因此如果dispose的值与b_list的值不相等,
*则需要将该bh对像从原来的lru_list链表中摘除,然后将他插入到新的lru_list链表中;
*且如果如果新lru_list链表是BUF_CLEAN链表,
*则还需要调用remove_inode_queue()函数将该bh对像
*从相应inode的脏缓冲区链表i_dirty_buffers中删除
*/
static void __refile_buffer(struct buffer_head *bh)
{
int dispose = BUF_CLEAN;
if (buffer_locked(bh))
dispose = BUF_LOCKED;
if (buffer_dirty(bh))
dispose = BUF_DIRTY;
if (dispose != bh->b_list) {
__remove_from_lru_list(bh);
bh->b_list = dispose;
if (dispose == BUF_CLEAN)
/*有关索引节点对象管理的两个脏缓冲区双向链表*/
remove_inode_queue(bh);
__insert_into_lru_list(bh, dispose);
}
}
***********************************************
/*
* This is the actual bdflush daemon itself. It used to be started from
* the syscall above, but now we launch it ourselves internally with
* kernel_thread(...) directly after the first thread in init/main.c
*/
int bdflush(void *startup)
{
struct task_struct *tsk = current;
/*
* We have a bare-bones task_struct, and really should fill
* in a few more things so "top" and /proc/2/{exe,root,cwd}
* display semi-sane things. Not real crucial though...
*/
set_special_pids(1, 1);
strcpy(tsk->comm, "bdflush");
/* avoid getting signals */
spin_lock_irq(&tsk->sighand->siglock);
flush_signals(tsk);
sigfillset(&tsk->blocked);
recalc_sigpending_tsk(tsk);
spin_unlock_irq(&tsk->sighand->siglock);
complete((struct completion *)startup);
/*
* set up the timer
*/
init_timer(&bdflush_timer);
bdflush_timer.function = bdflush_timeout;
bdflush_timer.expires = jiffies + HZ/50;
add_timer(&bdflush_timer);
/*
* FIXME: The ndirty logic here is wrong. It's supposed to
* send bdflush back to sleep after writing ndirty buffers.
* In fact, the test is wrong so bdflush will in fact
* sleep when bdflush_stop() returns true.
*
* FIXME: If it proves useful to implement ndirty properly,
* then perhaps the value of ndirty should be scaled by the
* amount of memory in the machine.
*/
for (;;) {
int ndirty = bdf_prm.b_un.ndirty;
CHECK_EMERGENCY_SYNC
del_timer(&bdflush_timer);
clear_bit(0, &bdflush_needs_waking);
while (ndirty > 0) {
/*给lru_list上自旋锁*/
spin_lock(&lru_list_lock);
/*为多达32个 未加锁 脏 缓冲区试图激活块I/O写操作
*问题:这里指定的块设备的NODEV是在哪里定义传入的??
*在include/linux/kdev中初始定义了#define 0
* 那么在哪里重新定义了呢?
*/
if (!write_some_buffers(NODEV))
break;
ndirty -= NRSYNC;
}
if (ndirty > 0 || bdflush_stop()) {
run_task_queue(&tq_disk);
interruptible_sleep_on(&bdflush_wait);
}
}
}
/*
* Write some buffers from the head of the dirty queue.
*
* This must be called with the LRU lock held, and will
* return without it!
*/
/*
* 为多达32个 未加锁 脏 缓冲区试图激活块I/O写操作.
* 一旦写操作激活,释放lru_list_lock自旋锁
* 如果找到的 未加锁 脏缓冲区小于32个,则返回0;
* 否则返回一个负值
*/
#define NRSYNC (32)
static int write_some_buffers(kdev_t dev)
{
struct buffer_head *next;
struct buffer_head *array[NRSYNC];
unsigned int count;
int nr;
/*BUF_DIRTY队列头赋给next,队列长度赋给nr*/
next = lru_list[BUF_DIRTY];
nr = nr_buffers_type[BUF_DIRTY];
count = 0;
while (next && --nr >= 0) {
struct buffer_head * bh = next;
next = bh->b_next_free;/*下一个buffer*/
/*dev是从上面传入的NODEV,在这里比较系统的NODEV*/
if (dev != NODEV && bh->b_dev != dev)
continue;
/*锁定将要传送的块(这里的块从lru_list[BUF_DIRTY]取出的,
*应该均设置了BH_Dirty位)
*返回原值,如果是'未加锁'->'加锁'
*重新确定bh在lru_list中的位置,这里放到locked链表还是dirty链表呢???
*按照__refile_buffer(bh)对标志的优先值,如果dirty和locked标志都
*设置的时候dispose = BH_Dirty,那么BH_Dirty为先
*那么就还是lru_list[BUF_DIRTY]中了???
*这里__refile_buffer的作用是什么呢???只是为了清除可能在自旋锁
*未加前被其他进程又设置成clean的bh?
*/
if (test_and_set_bit(BH_Lock, &bh->b_state)) {
__refile_buffer(bh);
continue;
}
/*清除BH_Dirty标志,将bh标记为干净的
*返回原值,如果原值是'脏'的
*重新确定bh在lru_list中的位置
*那么这里原来BH_Dirty和BH_Locked都设置了的bh将BH_Dirty标志清除了
*即这些脏bh已经被选定写回设备,则会被放入lru_list[BH_LOCKED]链表,
*并将这些bh放入array[]数组
*/
if (atomic_set_buffer_clean(bh)) {
__refile_buffer(bh);
/*
* 增加缓冲区头的引用计数b_count,确保该缓冲区头不会再被分配出去;
* 当完成对缓冲区头的操作之后,还必须使用put_bh()函数减少引用计数
*/
get_bh(bh);
array[count++] = bh;
/*不足32个,继续*/
if (count < NRSYNC)
continue;
/*有32个满足条件的bh了,释放自旋锁,正式提交*/
spin_unlock(&lru_list_lock);
write_locked_buffers(array, count);
conditional_schedule();
return -EAGAIN;
}
/*清bh的locked等标志(具体见下面函数解释),并重新链入lru_list*/
unlock_buffer(bh);
__refile_buffer(bh);
} //while循环
/*
*这里是不是在上面while循环中count不足32个后就没有交入
*write_locked_buffers让count递减
*则count是一个不足32的值,然后释放自旋锁
*??????????但是为什么下面又要提交给write_locked_buffers呢????
*/
spin_unlock(&lru_list_lock);
if (count)
write_locked_buffers(array, count);
return 0;
}
/*
* The buffers have been marked clean and locked. Just submit the dang
* things..
*/
/*
*安装IO操作结束的通知函数
*submit_bh提交
*/
static void write_locked_buffers(struct buffer_head **array, unsigned int count)
{
do {
struct buffer_head * bh = *array++;
bh->b_end_io = end_buffer_io_sync;
submit_bh(WRITE, bh);
} while (--count);
}