int generic_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) { struct inode *inode = page->mapping->host;
/* *求出写入后文件总字节数 */ loff_t pos = ((loff_t)page->index
__block_commit_write(inode,page,from,to);
/*释放高端内存*/ kunmap(page);
/* *检查写入后是否扩大了文件 *如果总字节数>文件的字节数,更新inode->i_size字段 *并把inode标记为脏 */ if (pos > inode->i_size) { inode->i_size = pos; mark_inode_dirty(inode); } return 0; }
static int __block_commit_write(struct inode *inode, struct page *page, unsigned from, unsigned to) { unsigned block_start, block_end; int partial = 0, need_balance_dirty = 0; unsigned blocksize; struct buffer_head *bh, *head;
/*块大小*/ blocksize = 1 << inode->i_blkbits;
/* *对页中每个buffer对应的bh *block_start记录循环写入的总块大小
*/ for(bh = head = page_buffers(page), block_start = 0; bh != head || !block_start; block_start=block_end, bh = bh->b_this_page) {
block_end = block_start + blocksize; if (block_end = to) { if (!buffer_uptodate(bh)) partial = 1; } else { /* *如果缓冲区与写入的范围(from与to)相交 *设置BH_Uptodate标志,即"一致"(其实buffer内容还未写入设备,但马上 *就会完成) *atomic_set_buffer_dirty(bh)重置BH_Dirty标志位,该原子操作将返回 *BH_Dirty标志位的原有值BH_Dirty *如果以前就为脏就不需要任何额外的操作函数就可以直接返回 *如果该缓冲区原来是"干净"的:从"干净" ——> "脏" * 调用__mark_dirty根据BH_Dirty和BH_Lock标志的值把缓冲区移到适当 *的链表中(这里是插入BUF_DIRTY链表) * 调用buffer_insert_inode_data_queue插入inode的脏缓冲区链表(文件 * 数据的那一个) * need_balance_dirty = 1 */ set_bit(BH_Uptodate, &bh->b_state); if (!atomic_set_buffer_dirty(bh)) { __mark_dirty(bh); buffer_insert_inode_data_queue(bh, inode); need_balance_dirty = 1; } } }
/*如果bh是从"干净" ——> "脏" *调用balance_dirty检查是否积累足够的"脏"页面(确认在BU) *计算由balance_dirty_state()协助函数完成 *协助函数返回 -1 "脏"页面不足不唤醒bdflush *协助函数返回 0 "脏"页面足够,唤醒bdflush,异步刷新 *协助函数返回 1 "脏"页面太多,唤醒bdflush,阻塞当前进程进行刷新 */ if (need_balance_dirty) balance_dirty(); /* * is this a partial write that happened to make all buffers * uptodate then we can optimize away a bogus readpage() for * the next read(). Here we 'discover' wether the page went * uptodate as a result of this (potentially partial) write. */ if (!partial) SetPageUptodate(page); return 0; }
***********************************************
/* *更新bh对象的b_flushtime成员的值,以确定该脏缓冲区回写磁盘的时间期限; *调用refile_buffer()函数,将该bh对像移到新的lru_list链表中(在这里就是移到BUF_DIRTY链表中) */ inline void __mark_dirty(struct buffer_head *bh) { bh->b_flushtime = jiffies + bdf_prm.b_un.age_buffer; refile_buffer(bh); }
void refile_buffer(struct buffer_head *bh) { /* *对于已用bh,其bh->b_list字段为所在链表索引即在lru_list[]中的序号,表示该 *缓冲区的使用状态(如BH_CLEAN = 0) *lru_list指针数组存放每个链表的第一个元素的地址 *自旋锁保护这些指针数组免受多处理器系统的并发访问 */ spin_lock(&lru_list_lock); __refile_buffer(bh); spin_unlock(&lru_list_lock); }
/* * A buffer may need to be moved from one buffer list to another * (e.g. in case it is not shared any more). Handle this. */ /* *根据BH_Dirty和BH_Lock标志的值把缓冲区移到适当的链表中 *由于b_list值表示该bh对像当前所处的lru_list链表。 *因此如果dispose的值与b_list的值不相等, *则需要将该bh对像从原来的lru_list链表中摘除,然后将他插入到新的lru_list链表中; *且如果如果新lru_list链表是BUF_CLEAN链表, *则还需要调用remove_inode_queue()函数将该bh对像 *从相应inode的脏缓冲区链表i_dirty_buffers中删除 */ static void __refile_buffer(struct buffer_head *bh) { int dispose = BUF_CLEAN; if (buffer_locked(bh)) dispose = BUF_LOCKED; if (buffer_dirty(bh)) dispose = BUF_DIRTY; if (dispose != bh->b_list) { __remove_from_lru_list(bh); bh->b_list = dispose; if (dispose == BUF_CLEAN) /*有关索引节点对象管理的两个脏缓冲区双向链表*/ remove_inode_queue(bh);
__insert_into_lru_list(bh, dispose); } }
***********************************************
/* * This is the actual bdflush daemon itself. It used to be started from * the syscall above, but now we launch it ourselves internally with * kernel_thread(...) directly after the first thread in init/main.c */ int bdflush(void *startup) { struct task_struct *tsk = current;
/* * We have a bare-bones task_struct, and really should fill * in a few more things so "top" and /proc/2/{exe,root,cwd} * display semi-sane things. Not real crucial though... */
set_special_pids(1, 1); strcpy(tsk->comm, "bdflush");
/* avoid getting signals */ spin_lock_irq(&tsk->sighand->siglock); flush_signals(tsk); sigfillset(&tsk->blocked); recalc_sigpending_tsk(tsk); spin_unlock_irq(&tsk->sighand->siglock);
complete((struct completion *)startup); /* * set up the timer */ init_timer(&bdflush_timer); bdflush_timer.function = bdflush_timeout; bdflush_timer.expires = jiffies + HZ/50; add_timer(&bdflush_timer);
/* * FIXME: The ndirty logic here is wrong. It's supposed to * send bdflush back to sleep after writing ndirty buffers. * In fact, the test is wrong so bdflush will in fact * sleep when bdflush_stop() returns true. * * FIXME: If it proves useful to implement ndirty properly, * then perhaps the value of ndirty should be scaled by the * amount of memory in the machine. */ for (;;) { int ndirty = bdf_prm.b_un.ndirty;
CHECK_EMERGENCY_SYNC
del_timer(&bdflush_timer); clear_bit(0, &bdflush_needs_waking);
while (ndirty > 0) { /*给lru_list上自旋锁*/ spin_lock(&lru_list_lock); /*为多达32个 未加锁 脏 缓冲区试图激活块I/O写操作 *问题:这里指定的块设备的NODEV是在哪里定义传入的?? *在 include/linux/kdev 中初始定义了#define
0 * 那么在哪里重新定义了呢? */ if (!write_some_buffers(NODEV)) break; ndirty -= NRSYNC; } if (ndirty > 0 || bdflush_stop()) { run_task_queue(&tq_disk); interruptible_sleep_on(&bdflush_wait); } } }
/* * Write some buffers from the head of the dirty queue. * * This must be called with the LRU lock held, and will * return without it! */ /* * 为多达32个 未加锁 脏 缓冲区试图激活块I/O写操作. * 一旦写操作激活,释放lru_list_lock自旋锁 * 如果找到的 未加锁 脏缓冲区小于32个,则返回0; * 否则返回一个负值 */ #define NRSYNC (32) static int write_some_buffers(kdev_t dev) { struct buffer_head *next; struct buffer_head *array[NRSYNC]; unsigned int count; int nr;
/*BUF_DIRTY队列头赋给next,队列长度赋给nr*/ next = lru_list[BUF_DIRTY]; nr = nr_buffers_type[BUF_DIRTY]; count = 0;
while (next && --nr >= 0) { struct buffer_head * bh = next; next = bh->b_next_free;/*下一个buffer*/ /*dev是从上面传入的NODEV,在这里比较系统的NODEV*/ if (dev != NODEV && bh->b_dev != dev) continue;
/*锁定将要传送的块(这里的块从lru_list[BUF_DIRTY]取出的, *应该均设置了BH_Dirty位) *返回原值,如果是'未加锁'->'加锁' *重新确定bh在lru_list中的位置,这里放到locked链表还是dirty链表呢??? *按照__refile_buffer(bh)对标志的优先值,如果dirty和locked标志都 *设置的时候dispose = BH_Dirty,那么BH_Dirty为先 *那么就还是lru_list[BUF_DIRTY]中了??? *这里__refile_buffer的作用是什么呢???只是为了清除可能在自旋锁 *未加前被其他进程又设置成clean的bh? */ if (test_and_set_bit(BH_Lock, &bh->b_state)) { __refile_buffer(bh); continue; }
/*清除BH_Dirty标志,将bh标记为干净的 *返回原值,如果原值是'脏'的 *重新确定bh在lru_list中的位置 *那么这里原来BH_Dirty和BH_Locked都设置了的bh将BH_Dirty标志清除了 *即这些脏bh已经被选定写回设备,则会被放入lru_list[BH_LOCKED]链表, *并将这些bh放入array[]数组 */ if (atomic_set_buffer_clean(bh)) { __refile_buffer(bh); /* * 增加缓冲区头的引用计数b_count,确保该缓冲区头不会再被分配出去; * 当完成对缓冲区头的操作之后,还必须使用put_bh()函数减少引用计数 */ get_bh(bh); array[count++] = bh; /*不足32个,继续*/ if (count continue;
/*有32个满足条件的bh了,释放自旋锁,正式提交*/ spin_unlock(&lru_list_lock); write_locked_buffers(array, count); conditional_schedule(); return -EAGAIN; }
/*清bh的locked等标志(具体见下面函数解释),并重新链入lru_list*/ unlock_buffer(bh); __refile_buffer(bh); } //while循环
/* *这里是不是在上面while循环中count不足32个后就没有交入 *write_locked_buffers让count递减 *则count是一个不足32的值,然后释放自旋锁 *??????????但是为什么下面又要提交给write_locked_buffers呢???? */ spin_unlock(&lru_list_lock);
if (count) write_locked_buffers(array, count); return 0; }
/* * The buffers have been marked clean and locked. Just submit the dang * things.. */ /* *安装IO操作结束的通知函数 *submit_bh提交 */ static void write_locked_buffers(struct buffer_head **array, unsigned int count) { do { struct buffer_head * bh = *array++; bh->b_end_io = end_buffer_io_sync; submit_bh(WRITE, bh); } while (--count); }
|