一、页高速缓存中涉及的数据结构
1、页高速缓存的核心数据结构address_space对象,它是一个嵌入在页所有者的索引节点对象的数据结构。高速缓存中的许多页可能属于同一个所有者,从而可以链接到同一个address_space对象
-
struct address_space {
-
struct inode *host; /* 指向拥有该对象的索引节点的指针 */
-
struct radix_tree_root page_tree; /* 拥有者页的基树的根*/
-
rwlock_t tree_lock; /* 保护基树的自旋锁 */
-
unsigned int i_mmap_writable;/* 地址空间共享内存映射的个数 */
-
struct prio_tree_root i_mmap;
-
struct list_head i_mmap_nonlinear;
-
spinlock_t i_mmap_lock;
-
unsigned int truncate_count;
-
unsigned long nrpages; /* 所有者的总页数 */
-
pgoff_t writeback_index;
-
struct address_space_operations *a_ops; /* 对页操作的方法 */
-
unsigned long flags;
-
struct backing_dev_info *backing_dev_info;
-
spinlock_t private_lock;
-
struct list_head private_list;
-
struct address_space *assoc_mapping;
-
};
2、基树
radix_tree_root数据结构:
-
struct radix_tree_root {
-
unsigned int height;
-
int gfp_mask;
-
struct radix_tree_node *rnode;
-
};
radix_tree_node数据结构:
-
struct radix_tree_node {
-
unsigned int count;
-
void *slots[RADIX_TREE_MAP_SIZE];
-
unsigned long tags[RADIX_TREE_TAGS][RADIX_TREE_TAG_LONGS];
-
};
内核中的基树结构:
总结:address_space,inode,基树以及页描述符之间的关系
1、每个adrres_space对象
对应一颗搜索树。
他们之间的联系是通过address_space对象中的page_tree字段指向该address_space对象对应的基树。
2、一个inode节点对应一个address_space对象,
其中inode节点对象的i_mapping和i_data字段指向相应的 address_space对象,而address_space对象的host字段指向对应的inode节点对象。
3、
)一般情况下一个inode节点对象对应的文件或者是块设备都会包含多个页面的内容,所以一个inode对象对应多个page描述符。同一个文件拥有的所有page描述符都可以在该文件对应的基树中找到。
它们之间的关系如下图:
二、页高速缓存的处理函数
对页高速缓存操作的基本高级函数有查找、添加和删除页。
-
#ifdef __KERNEL__
-
#define RADIX_TREE_MAP_SHIFT 6
-
#else
-
#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */
-
#endif
-
#define RADIX_TREE_TAGS 2
-
-
#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
-
#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)
-
-
#define RADIX_TREE_TAG_LONGS \
-
((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
2.1 查找页
查找页主要是通过find_get_page()接受参数为指向address_space对象的指针和偏移量,具体代码如下:
-
struct page * find_get_page(struct address_space *mapping, unsigned long offset)
-
{
-
struct page *page;
-
-
read_lock_irq(&mapping->tree_lock);
-
page = radix_tree_lookup(&mapping->page_tree, offset);
-
if (page)
-
page_cache_get(page); //增加该页的使用计数器
-
read_unlock_irq(&mapping->tree_lock);
-
return page;
-
}
该函数由radix_tree_lookup()和page_cache_get()函数实现,首先看一下radix_tree_lookup()的代码:
-
void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
-
{
-
unsigned int height, shift;
-
struct radix_tree_node **slot;
-
-
height = root->height;
-
if (index > radix_tree_maxindex(height)) //索引是否大于当前深度对应的最大索引值
-
return NULL;
-
-
shift = (height-1) * RADIX_TREE_MAP_SHIFT;
-
slot = &root->rnode;
-
-
while (height > 0) {
-
if (*slot == NULL)
-
return NULL;
-
-
slot = (struct radix_tree_node **)
-
((*slot)->slots +
-
((index >> shift) & RADIX_TREE_MAP_MASK));
-
shift -= RADIX_TREE_MAP_SHIFT;
-
height--;
-
}
-
-
return *slot;
-
}
该函数主要是找到基树中与index对应的页描述符,并返回指向该页描述符的地址。
对于基树查找的说明:对于一个index比如131,其二进制表示为:000010 000011,从低位到高位以每六位为一组表示一层。最低六位表示page在最下面一层的位置,这里为000011表示最下面一层的slots下标为3表示的page。次低六位表示的是次下面一层的位置。例如图
从上图看:首先根据000010表示要找的页在根节点的下一层中的第三个位置即slots[2]表示的子树下面。因此我们顺着slots[2]的子树,再根据000011表示要找的页在此层中第四个位置即slots[3]下面。然后在顺着slots[3]就可以找到我们index=131的page了。
有了上面的基础就很容易的理解slot = (struct radix_tree_node **)((*slot)->slots + ((index >> shift) & RADIX_TREE_MAP_MASK));这条语句的意思了。首先这条语句的关键就是((index >> shift) & RADIX_TREE_MAP_MASK)寻找每层的下标。例如height为2,RADIX_TREE_MAP_SHIFT的值为6,那么一开始shift的值为6,那么(index>>6)&111111表示index右移6位,然后和111111按位与,就可以取得index的最高的六位的数字即第一层的下标了。当第二次循环的时候shift为0,此时index&111111就可以取得最底层的下标这样就可以找到相应的页了。
page_cache_get()函数增加该页的使用计数器。
2. 2 增加页
函数add_to_page_cache()把一个新页的描述符插入到页高速缓存。具体代码如下:
-
int add_to_page_cache(struct page *page, struct address_space *mapping,
-
pgoff_t offset, int gfp_mask)
-
{
-
int error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
-
-
if (error == 0) {
-
write_lock_irq(&mapping->tree_lock);
-
error = radix_tree_insert(&mapping->page_tree, offset, page);
-
if (!error) {
-
page_cache_get(page);
-
SetPageLocked(page);
-
page->mapping = mapping;
-
page->index = offset;
-
mapping->nrpages++;
-
pagecache_acct(1);
-
}
-
write_unlock_irq(&mapping->tree_lock);
-
radix_tree_preload_end();
-
}
-
return error;
-
}
函数radix_tree_preload()函数禁止内核抢占,并把一些空的radix_tree_node结构赋给每cpu变量radix_tree_preloads.radix_tree_node结构的分配由slab分配器高速缓存radix_tree_node_cachep来完成。如果radix_tree_preload()预分配radix_tree_node结构不成功,函数add_to_page_cache就终止返回。否则,如果radix_tree_preload()成功地完成预分配,add_to_page_cache()函数肯定不会因为缺乏空闲内存或因为文件的大小达到了64GB而无法完成新页描述符的插入。
radix_tree_inser()在树中插入新节点,
-
/* Make sure the tree is high enough. */
-
if ((!index && !root->rnode) ||
-
index > radix_tree_maxindex(root->height)) {
-
error = radix_tree_extend(root, index);
-
if (error)
-
return error;
-
}
上面一段代码主要是判断要插入的页的index是否小于当前页的深度对应的最大索引值。若大于,则通过radix_tree_extend()扩展基树的深度,并分配radix_tree_node,而且将该radix_tree_node和原有的基树建立连接。新分配的radix_tree_node添加在radix_tree_root之下,所以radix_tree_node之上。代码如下:
-
do {
-
if (!(node = radix_tree_node_alloc(root)))
-
return -ENOMEM;
-
-
/* Increase the height. */
-
node->slots[0] = root->rnode;
-
-
/* Propagate the aggregated tag info into the new root */
-
for (tag = 0; tag < RADIX_TREE_TAGS; tag++) {
-
if (tags[tag])
-
tag_set(node, tag, 0);
-
}
-
-
node->count = 1;
-
root->rnode = node;
-
root->height++;
-
} while (height > root->height);
node->slots[0] = root->rnode;则是将原来的radix_tree_node的地址放在新分配的radix_tree_node的slots[0]中。
root->rnode = node;则是root的rnode指向新分配的radix_tree_node;
-
slot = &root->rnode;
-
height = root->height;
-
shift = (height-1) * RADIX_TREE_MAP_SHIFT;
-
-
offset = 0; /* uninitialised var warning */
-
while (height > 0) {
-
if (*slot == NULL) {
-
/* Have to add a child node. */
-
if (!(tmp = radix_tree_node_alloc(root)))
-
return -ENOMEM;
-
*slot = tmp;
-
if (node)
-
node->count++;
-
}
-
-
/* Go a level down */
-
offset = (index >> shift) & RADIX_TREE_MAP_MASK;
-
node = *slot;
-
slot = (struct radix_tree_node **)(node->slots + offset);
-
shift -= RADIX_TREE_MAP_SHIFT;
-
height--;
-
}
-
-
if (*slot != NULL)
-
return -EEXIST;
-
if (node) {
-
node->count++;
-
BUG_ON(tag_get(node, 0, offset));
-
BUG_ON(tag_get(node, 1, offset));
-
}
-
-
*slot = item;
-
return 0;
以下面的图形例子来对上面的函数进行说明:
slot = &root->rnode;使slot指向radix_tree_root的rnode。
height=2;
进入while循环:(此时还只有黑色线条部分)
*slot表示rnode中存储的地址,此时不为空,则执行下面语句段:以index=131为例,其二进制为:000010 000011
-
/* Go a level down */
-
offset = (index >> shift) & RADIX_TREE_MAP_MASK;
-
node = *slot;
-
slot = (struct radix_tree_node **)(node->slots + offset);
-
shift -= RADIX_TREE_MAP_SHIFT;
-
height--;
此时offset的值为:000010;
node = *slot;使node指向第一个radix_tree_node
slot = (struct radix_tree_node **)(node->slots + offset);使slot指向第一个radix_tree_node的slot[2];
再一次进入while循环:(实现上图中的绿色部分)
此时*slot==NULL,故进入下面的程序段:
-
if (*slot == NULL) {
-
/* Have to add a child node. */
-
if (!(tmp = radix_tree_node_alloc(root)))
-
return -ENOMEM;
-
*slot = tmp;
-
if (node)
-
node->count++;
-
}
-
radix_tree_node_alloc(root);分配一个radix_tree_node
*slot = tmp;将该radix_tree_node的地址放在slot中
然后执行:
-
/* Go a level down */
-
offset = (index >> shift) & RADIX_TREE_MAP_MASK;
-
node = *slot;
-
slot = (struct radix_tree_node **)(node->slots + offset);
-
shift -= RADIX_TREE_MAP_SHIFT;
-
height--;
最后执行:
-
if (*slot != NULL)
-
return -EEXIST;
-
if (node) {
-
node->count++;
-
BUG_ON(tag_get(node, 0, offset));
-
BUG_ON(tag_get(node, 1, offset));
-
}
-
-
*slot = item;
-
return 0;
此时node指向第二层那个radix_tree_node,
*slot = item;将要添加的page加入的地址赋给*slot;
2.3 删除页
函数remove_from_page_cache()实现从高速缓存中删除页描述符,代码如下:
-
void remove_from_page_cache(struct page *page)
-
{
-
struct address_space *mapping = page->mapping; //获得page对应的address_space对象
-
-
BUG_ON(!PageLocked(page));
-
-
write_lock_irq(&mapping->tree_lock);
-
__remove_from_page_cache(page);
-
write_unlock_irq(&mapping->tree_lock);
-
}
__remove_from_page_cache()代码如下:
-
void __remove_from_page_cache(struct page *page)
-
{
-
struct address_space *mapping = page->mapping;
-
-
radix_tree_delete(&mapping->page_tree, page->index);
-
page->mapping = NULL;
-
mapping->nrpages--;
-
pagecache_acct(-1);
-
}
-
page->mapping = NULL;把page的mapping字段置为NULL
-
mapping->nrpages--; 把所有者的页总数减1
radix_tree_delete()代码如下:
-
void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
-
{
-
struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path; //path为结构体数组,将结构体数组的首元的首地址赋值给pathp
-
struct radix_tree_path *orig_pathp;
-
unsigned int height, shift;
-
void *ret = NULL;
-
char tags[RADIX_TREE_TAGS];
-
int nr_cleared_tags;
-
-
height = root->height; //以height=2为例
-
if (index > radix_tree_maxindex(height)) //保证要删除的页索引小于当前深度所允许的最大索引值
-
goto out;
-
-
shift = (height - 1) * RADIX_TREE_MAP_SHIFT; //6
-
pathp->node = NULL; //初始条件以下图1中黑色线条表示
-
pathp->slot = &root->rnode;
-
-
while (height > 0) { //height=2以下图1中蓝色线条表示
-
int offset; //height=1以下图1中红色线条表示
-
-
if (*pathp->slot == NULL)
-
goto out;
-
-
offset = (index >> shift) & RADIX_TREE_MAP_MASK;
-
pathp[1].offset = offset;
-
pathp[1].node = *pathp[0].slot;
-
pathp[1].slot = (struct radix_tree_node **)
-
(pathp[1].node->slots + offset);
-
pathp++;
-
shift -= RADIX_TREE_MAP_SHIFT;
-
height--;
-
}
-
-
ret = *pathp[0].slot;
-
if (ret == NULL)
-
goto out;
-
-
orig_pathp = pathp;
-
-
/*
-
* Clear all tags associated with the just-deleted item
-
*/
-
memset(tags, 0, sizeof(tags)); //这里没太看懂
-
do {
-
int tag;
-
-
nr_cleared_tags = RADIX_TREE_TAGS;
-
for (tag = 0; tag < RADIX_TREE_TAGS; tag++) {
-
int idx;
-
-
if (tags[tag])
-
continue;
-
-
tag_clear(pathp[0].node, tag, pathp[0].offset);
-
-
for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
-
if (pathp[0].node->tags[tag][idx]) {
-
tags[tag] = 1;
-
nr_cleared_tags--;
-
break;
-
}
-
}
-
}
-
pathp--;
-
} while (pathp[0].node && nr_cleared_tags);
-
-
pathp = orig_pathp; //从最后一个节点开始,对路径数组中的节点开始循环操作。对每个节点,把指向
-
*pathp[0].slot = NULL; //下一个节点(或页描述符)位置数组的元素置为NULL,并递减count字段。如果
-
while (pathp[0].node && --pathp[0].node->count == 0) { //count变为0,就从树中删除节点并把radix_tree_node结构释放给slab分配器
-
pathp--; //高速缓存。然后继续循环处理路径数组中的节点
-
BUG_ON(*pathp[0].slot == NULL);
-
*pathp[0].slot = NULL;
-
radix_tree_node_free(pathp[1].node);
-
}
-
if (root->rnode == NULL)
-
root->height = 0;
-
out:
-
return ret; //返回已经从树中删除的页描述符指针
-
}
至此,完成了页的删除。
阅读(1777) | 评论(0) | 转发(1) |