generic_commit_write解析-铃溪-ChinaUnix博客

铃溪的ChinaUnix博客

首页　| 　博文目录　| 　关于我

铃溪

博客访问： 99223
博文数量： 29
博客积分： 0
博客等级：民兵
技术积分： 10
用户组：普通用户
注册时间： 2014-09-01 19:43

文章分类

全部博文（29）

linux源码（28）

内核proc文件系统（4）
未分配的博文（1）

文章存档

2017年（4）

2015年（24）

2014年（1）

我的朋友

相关博文

generic_commit_write解析

分类： LINUX

2015-07-11 13:56:35

原文地址：generic_commit_write解析作者：zixin

int generic_commit_write(struct file *file, struct page *page,

unsigned from, unsigned to)

{

struct inode *inode = page->mapping->host;

*求出写入后文件总字节数

loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;

__block_commit_write(inode,page,from,to);

/*释放高端内存*/

kunmap(page);

*检查写入后是否扩大了文件

*如果总字节数>文件的字节数，更新inode->i_size字段

*并把inode标记为脏

if (pos > inode->i_size) {

inode->i_size = pos;

mark_inode_dirty(inode);

}

return 0;

}

static int __block_commit_write(struct inode *inode, struct page *page,

unsigned from, unsigned to)

{

unsigned block_start, block_end;

int partial = 0, need_balance_dirty = 0;

unsigned blocksize;

struct buffer_head *bh, *head;

/*块大小*/

blocksize = 1 << inode->i_blkbits;

*对页中每个buffer对应的bh

*block_start记录循环写入的总块大小

for(bh = head = page->buffers, block_start = 0;

bh != head || !block_start;

block_start=block_end, bh = bh->b_this_page) {

block_end = block_start + blocksize;

if (block_end <= from || block_start >= to) {

if (!buffer_uptodate(bh))

partial = 1;

} else {

*如果缓冲区与写入的范围(from与to)相交

*设置BH_Uptodate标志，即"一致"(其实buffer内容还未写入设备，但马上

*就会完成)

*atomic_set_buffer_dirty(bh)重置BH_Dirty标志位，该原子操作将返回

*BH_Dirty标志位的原有值BH_Dirty

*如果以前就为脏就不需要任何额外的操作函数就可以直接返回

*如果该缓冲区原来是"干净"的：从"干净" ——> "脏"

* 调用__mark_dirty根据BH_Dirty和BH_Lock标志的值把缓冲区移到适当

*的链表中(这里是插入BUF_DIRTY链表)

* 调用buffer_insert_inode_data_queue插入inode的脏缓冲区链表(文件

* 数据的那一个)

* need_balance_dirty = 1

set_bit(BH_Uptodate, &bh->b_state);

if (!atomic_set_buffer_dirty(bh)) {

__mark_dirty(bh);

buffer_insert_inode_data_queue(bh, inode);

need_balance_dirty = 1;

}

/*如果bh是从"干净" ——> "脏"

*调用balance_dirty检查是否积累足够的"脏"页面(确认在BU)

*计算由balance_dirty_state()协助函数完成

*协助函数返回 -1 "脏"页面不足不唤醒bdflush

*协助函数返回 0 "脏"页面足够，唤醒bdflush，异步刷新

*协助函数返回 1 "脏"页面太多，唤醒bdflush，阻塞当前进程进行刷新

if (need_balance_dirty)

balance_dirty();

* is this a partial write that happened to make all buffers

* uptodate then we can optimize away a bogus readpage() for

* the next read(). Here we 'discover' wether the page went

* uptodate as a result of this (potentially partial) write.

if (!partial)

SetPageUptodate(page);

return 0;

}

***********************************************

*更新bh对象的b_flushtime成员的值，以确定该脏缓冲区回写磁盘的时间期限；

*调用refile_buffer()函数，将该bh对像移到新的lru_list链表中（在这里就是移到BUF_DIRTY链表中）

inline void __mark_dirty(struct buffer_head *bh)

{

bh->b_flushtime = jiffies + bdf_prm.b_un.age_buffer;

refile_buffer(bh);

}

void refile_buffer(struct buffer_head *bh)

{

*对于已用bh，其bh->b_list字段为所在链表索引即在lru_list[]中的序号,表示该

*缓冲区的使用状态(如BH_CLEAN = 0)

*lru_list指针数组存放每个链表的第一个元素的地址

*自旋锁保护这些指针数组免受多处理器系统的并发访问

spin_lock(&lru_list_lock);

__refile_buffer(bh);

spin_unlock(&lru_list_lock);

}

* A buffer may need to be moved from one buffer list to another

* (e.g. in case it is not shared any more). Handle this.

*根据BH_Dirty和BH_Lock标志的值把缓冲区移到适当的链表中

*由于b_list值表示该bh对像当前所处的lru_list链表。

*因此如果dispose的值与b_list的值不相等，

*则需要将该bh对像从原来的lru_list链表中摘除，然后将他插入到新的lru_list链表中；

*且如果如果新lru_list链表是BUF_CLEAN链表，

*则还需要调用remove_inode_queue()函数将该bh对像

*从相应inode的脏缓冲区链表i_dirty_buffers中删除

static void __refile_buffer(struct buffer_head *bh)

{

int dispose = BUF_CLEAN;

if (buffer_locked(bh))

dispose = BUF_LOCKED;

if (buffer_dirty(bh))

dispose = BUF_DIRTY;

if (dispose != bh->b_list) {

__remove_from_lru_list(bh);

bh->b_list = dispose;

if (dispose == BUF_CLEAN)

/*有关索引节点对象管理的两个脏缓冲区双向链表*/

remove_inode_queue(bh);

__insert_into_lru_list(bh, dispose);

}

***********************************************

* This is the actual bdflush daemon itself. It used to be started from

* the syscall above, but now we launch it ourselves internally with

* kernel_thread(...) directly after the first thread in init/main.c

int bdflush(void *startup)

{

struct task_struct *tsk = current;

* We have a bare-bones task_struct, and really should fill

* in a few more things so "top" and /proc/2/{exe,root,cwd}

* display semi-sane things. Not real crucial though...

set_special_pids(1, 1);

strcpy(tsk->comm, "bdflush");

/* avoid getting signals */

spin_lock_irq(&tsk->sighand->siglock);

flush_signals(tsk);

sigfillset(&tsk->blocked);

recalc_sigpending_tsk(tsk);

spin_unlock_irq(&tsk->sighand->siglock);

complete((struct completion *)startup);

* set up the timer

init_timer(&bdflush_timer);

bdflush_timer.function = bdflush_timeout;

bdflush_timer.expires = jiffies + HZ/50;

add_timer(&bdflush_timer);

* FIXME: The ndirty logic here is wrong. It's supposed to

* send bdflush back to sleep after writing ndirty buffers.

* In fact, the test is wrong so bdflush will in fact

* sleep when bdflush_stop() returns true.

* FIXME: If it proves useful to implement ndirty properly,

* then perhaps the value of ndirty should be scaled by the

* amount of memory in the machine.

for (;;) {

int ndirty = bdf_prm.b_un.ndirty;

CHECK_EMERGENCY_SYNC

del_timer(&bdflush_timer);

clear_bit(0, &bdflush_needs_waking);

while (ndirty > 0) {

/*给lru_list上自旋锁*/

spin_lock(&lru_list_lock);

/*为多达32个未加锁脏缓冲区试图激活块I/O写操作

*问题：这里指定的块设备的NODEV是在哪里定义传入的？？

*在include/linux/kdev中初始定义了#define 0

* 那么在哪里重新定义了呢？

if (!write_some_buffers(NODEV))

break;

ndirty -= NRSYNC;

}

if (ndirty > 0 || bdflush_stop()) {

run_task_queue(&tq_disk);

interruptible_sleep_on(&bdflush_wait);

}

* Write some buffers from the head of the dirty queue.

* This must be called with the LRU lock held, and will

* return without it!

* 为多达32个未加锁脏缓冲区试图激活块I/O写操作.

* 一旦写操作激活，释放lru_list_lock自旋锁

* 如果找到的未加锁脏缓冲区小于32个，则返回0；

* 否则返回一个负值

#define NRSYNC (32)

static int write_some_buffers(kdev_t dev)

{

struct buffer_head *next;

struct buffer_head *array[NRSYNC];

unsigned int count;

int nr;

/*BUF_DIRTY队列头赋给next,队列长度赋给nr*/

next = lru_list[BUF_DIRTY];

nr = nr_buffers_type[BUF_DIRTY];

count = 0;

while (next && --nr >= 0) {

struct buffer_head * bh = next;

next = bh->b_next_free;/*下一个buffer*/

/*dev是从上面传入的NODEV，在这里比较系统的NODEV*/

if (dev != NODEV && bh->b_dev != dev)

continue;

/*锁定将要传送的块(这里的块从lru_list[BUF_DIRTY]取出的，

*应该均设置了BH_Dirty位)

*返回原值，如果是'未加锁'->'加锁'

*重新确定bh在lru_list中的位置,这里放到locked链表还是dirty链表呢???

*按照__refile_buffer(bh)对标志的优先值，如果dirty和locked标志都

*设置的时候dispose = BH_Dirty，那么BH_Dirty为先

*那么就还是lru_list[BUF_DIRTY]中了？？？

*这里__refile_buffer的作用是什么呢？？？只是为了清除可能在自旋锁

*未加前被其他进程又设置成clean的bh？

if (test_and_set_bit(BH_Lock, &bh->b_state)) {

__refile_buffer(bh);

continue;

}

/*清除BH_Dirty标志，将bh标记为干净的

*返回原值，如果原值是'脏'的

*重新确定bh在lru_list中的位置

*那么这里原来BH_Dirty和BH_Locked都设置了的bh将BH_Dirty标志清除了

*即这些脏bh已经被选定写回设备，则会被放入lru_list[BH_LOCKED]链表，

*并将这些bh放入array[]数组

if (atomic_set_buffer_clean(bh)) {

__refile_buffer(bh);

* 增加缓冲区头的引用计数b_count，确保该缓冲区头不会再被分配出去；

* 当完成对缓冲区头的操作之后，还必须使用put_bh()函数减少引用计数

get_bh(bh);

array[count++] = bh;

/*不足32个，继续*/

if (count < NRSYNC)

continue;

/*有32个满足条件的bh了，释放自旋锁，正式提交*/

spin_unlock(&lru_list_lock);

write_locked_buffers(array, count);

conditional_schedule();

return -EAGAIN;

}

/*清bh的locked等标志(具体见下面函数解释)，并重新链入lru_list*/

unlock_buffer(bh);

__refile_buffer(bh);

} //while循环

*这里是不是在上面while循环中count不足32个后就没有交入

*write_locked_buffers让count递减

*则count是一个不足32的值，然后释放自旋锁

*??????????但是为什么下面又要提交给write_locked_buffers呢？？？？

spin_unlock(&lru_list_lock);

if (count)

write_locked_buffers(array, count);

return 0;

}

* The buffers have been marked clean and locked. Just submit the dang

* things..

*安装IO操作结束的通知函数

*submit_bh提交

static void write_locked_buffers(struct buffer_head **array, unsigned int count)

{

do {

struct buffer_head * bh = *array++;

bh->b_end_io = end_buffer_io_sync;

submit_bh(WRITE, bh);

} while (--count);

}

阅读(1546) | 评论(0) | 转发(0) |

上一篇：缓冲区首部结构

下一篇：block_prepare_write解析

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6