Vi Linux内存之 Slab分配器（六）-xhqChinaUnix-ChinaUnix博客

xhqChinaUnix的ChinaUnix博客

首页　| 　博文目录　| 　关于我

xhqChinaUnix

博客访问： 178068
博文数量： 50
博客积分： 0
博客等级：民兵
技术积分： 123
用户组：普通用户
注册时间： 2013-11-01 16:03

文章分类

全部博文（50）

go语言（1）
wifi（1）
ltib+freescale+i（4）
linux内存（23）
linux 常用（9）
linux 网络（8）

openwrt（2）
arm linux&n（1）
linux内核（3）
未分配的博文（0）

文章存档

2016年（3）

2015年（5）

2014年（35）

2013年（7）

我的朋友

Kernel的

相关博文

Vi Linux内存之 Slab分配器（六）

分类： LINUX

2014-04-04 11:54:52

原文地址：Vi Linux内存之 Slab分配器（六）作者：palals

前面介绍了slab级的代码，本文继续介绍更高一级，也就是cache部分的代码，先来看cache的创建。

kmem_cache_create

创建slab系统顶层的cache节点。创建完成后，cache里并没有任何slab以及对象，只有当分配对象，并且cache中没有空闲对象时，才会创建新的slab。

参数：

1) name：cache的名字。

2) size：对象的大小。

3) align：对齐方式。

4) flags：标志。

5) ctor：构造函数指针。

struct kmem_cache *

kmem_cache_create (const char *name, size_t size, size_t align,

unsigned long flags, void (*ctor)(void *))

{

size_t left_over, slab_size, ralign;

struct kmem_cache *cachep = NULL, *pc;

gfp_t gfp;

* Sanity checks... these are all serious usage bugs.

/* 安全性检查 */

if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||

size > KMALLOC_MAX_SIZE) {

printk(KERN_ERR "%s: Early error in slab %s\n", __func__,

name);

BUG();

}

* We use cache_chain_mutex to ensure a consistent view of

* cpu_online_mask as well. Please see cpuup_callback

/* slab分配器是否已经初始化好，如果是内核启动阶段，则只有一个cpu执行slab分配器的初始化动作，无需加锁，否则需要加锁 */

if (slab_is_available()) {

get_online_cpus();

mutex_lock(&cache_chain_mutex);

}

/* 遍历cache链，做些校验工作 */

list_for_each_entry(pc, &cache_chain, next) {

char tmp;

int res;

* This happens when the module gets unloaded and doesn't

* destroy its slab cache and no-one else reuses the vmalloc

* area of the module. Print a warning.

/* 检查cache链表中的cache是否都有名字 */

res = probe_kernel_address(pc->name, tmp);

if (res) {

printk(KERN_ERR

"SLAB: cache with size %d has lost its name\n",

pc->buffer_size);

continue;

}

/* 检查cache链表中是否已经存在相同名字的cache */

if (!strcmp(pc->name, name)) {

printk(KERN_ERR

"kmem_cache_create: duplicate cache %s\n", name);

dump_stack();

goto oops;

}

……

/* slab分配器是否已经准备好 */

if (slab_is_available())

gfp = GFP_KERNEL;

else

/* slab初始化好之前，不允许阻塞，且只能在低端内存区分配 */

gfp = GFP_NOWAIT;

/* Get cache's description obj. */

/* 获得struct kmem_cache对象 */

cachep = kmem_cache_zalloc(&cache_cache, gfp);

if (!cachep)

goto oops;

……

* Determine if the slab management is 'on' or 'off' slab.

* (bootstrapping cannot cope with offslab caches so don't do

* it too early on. Always use on-slab management when

* SLAB_NOLEAKTRACE to avoid recursive calls into kmemleak)

/* 确定slab管理对象的存储方式：内置还是外置。通常，当对象大于等于512时，使用外置方式。初始化阶段采用内置式。slab_early_init 参见kmem_cache_init函数 */

if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init &&

!(flags & SLAB_NOLEAKTRACE))

* Size is large, assume best to place the slab management obj

* off-slab (should allow better packing of objs).

flags |= CFLGS_OFF_SLAB;

/* 对齐 */

size = ALIGN(size, align);

/* 获得slab中碎片的大小 */

left_over = calculate_slab_order(cachep, size, align, flags);

/* cachep->num为该cache中每个slab的对象数，为0，表示为该对象创建cache失败 */

if (!cachep->num) {

printk(KERN_ERR

"kmem_cache_create: couldn't create cache %s.\n", name);

kmem_cache_free(&cache_cache, cachep);

cachep = NULL;

goto oops;

}

/* 计算slab管理对象的大小，包括struct slab对象和kmem_bufctl_t数组 */

slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)

+ sizeof(struct slab), align);

* If the slab has been placed off-slab, and we have enough space then

* move it on-slab. This is at the expense of any extra colouring.

/* 如果这是一个外置式slab，并且碎片大小大于slab管理对象的大小，则可将slab管理对象移到slab中，改造成一个内置式slab */

if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {

/* 除去off-slab标志位 */

flags &= ~CFLGS_OFF_SLAB;

/* 更新碎片大小 */

left_over -= slab_size;

}

if (flags & CFLGS_OFF_SLAB) {

/* really off slab. No need for manual alignment */

/* align是针对slab对象的，如果slab管理对象是外置存储，自然不会像内置那样影响到后面slab对象的存储位置，也就不需要对齐了 */

slab_size =

cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);

#ifdef CONFIG_PAGE_POISONING

/* If we're going to use the generic kernel_map_pages()

* poisoning, then it's going to smash the contents of

* the redzone and userword anyhow, so switch them off.

if (size % PAGE_SIZE == 0 && flags & SLAB_POISON)

flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);

#endif

}

/* cache的着色块的单位大小 */

cachep->colour_off = cache_line_size();

/* Offset must be a multiple of the alignment. */

/* 着色块大小必须是对象要求对齐方式的倍数 */

if (cachep->colour_off < align)

cachep->colour_off = align;

/* 计算碎片区需要多少个着色快 */

cachep->colour = left_over / cachep->colour_off;

/* slab管理对象的大小 */

cachep->slab_size = slab_size;

cachep->flags = flags;

cachep->gfpflags = 0;

if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))

cachep->gfpflags |= GFP_DMA;

/* slab对象的大小 */

cachep->buffer_size = size;

/* 计算对象在slab中索引时用，参见obj_to_index函数 */

cachep->reciprocal_buffer_size = reciprocal_value(size);

if (flags & CFLGS_OFF_SLAB) {

/* 分配一个slab管理对象，保存在slabp_cache中，如果是内置式的slab，此指针为空 */

cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);

* This is a possibility for one of the malloc_sizes caches.

* But since we go off slab only for object size greater than

* PAGE_SIZE/8, and malloc_sizes gets created in ascending order,

* this should not happen at all.

* But leave a BUG_ON for some lucky dude.

BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));

}

/* 对象的构造函数 */

cachep->ctor = ctor;

/* cache的名字 */

cachep->name = name;

/* 设置每个cpu上的local cache */

if (setup_cpu_cache(cachep, gfp)) {

__kmem_cache_destroy(cachep);

cachep = NULL;

goto oops;

}

/* cache setup completed, link it into the list */

/* cache创建完毕，将其加入到全局slab cache链表中 */

list_add(&cachep->next, &cache_chain);

oops:

if (!cachep && (flags & SLAB_PANIC))

panic("kmem_cache_create(): failed to create slab `%s'\n",

name);

if (slab_is_available()) {

mutex_unlock(&cache_chain_mutex);

put_online_cpus();

}

return cachep;

}

calculate_slab_order

计算slab由几个页面组成，同时计算每个slab中有多少对象。

static size_t calculate_slab_order(struct kmem_cache *cachep,

size_t size, size_t align, unsigned long flags)

{

unsigned long offslab_limit;

size_t left_over = 0;

int gfporder;

for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {

unsigned int num;

size_t remainder;

/* 计算slab中对象数 */

cache_estimate(gfporder, size, align, flags, &remainder, &num);

/* 对象数为0，表示此order下，一个对象都放不下，检查下一order */

if (!num)

continue;

if (flags & CFLGS_OFF_SLAB) {

* Max number of objs-per-slab for caches which

* use off-slab slabs. Needed to avoid a possible

* looping condition in cache_grow().

/* 创建一个外置式slab时，要相应分配该slab的管理对象，包含struct slab对象和kmem_bufctl_t数组，分配管理对象的流程就是分配普通对象的流程，再来看一下分配对象的流程：

kmem_cache_alloc->__cache_alloc-> __do_cache_alloc-> ____cache_alloc-> cache_alloc_refill-> cache_grow-> alloc_slabmgmt-> kmem_cache_alloc_node-> kmem_cache_alloc

可以看出这里可能存在一个循环，循环的关键在于alloc_slabmgmt函数，当slab管理对象是off-slab方式时，就形成了循环。那么什么时候slab管理对象会采用外置式slab呢？显然当其管理的slab中对象很多，从而kmem_bufctl_t数组很大，致使整个管理对象也很大，此时才会形成循环。故需要对kmem_bufctl_t的数目做限制，下面的算法是很粗略的，既然对象大小为size时，是外置式slab，那么我们假设管理对象的大小也是size，计算出kmem_bufctl_t数组的大小，即此大小的kmem_bufctl_t数组一定会造成管理对象是外置式slab。之所以说粗略，是指数组大小小于这个限制时，也不能确保管理对象一定是内置式slab。但这也不会引发错误，因为还有一个slab_break_gfp_order阀门来控制每个slab所占页面数，通常其值为1，即每个slab最多两个页面，外置式slab存放的都是大于512的大对象，所以slab中不会有太多的大对象，kmem_bufctl_t数组也不会很大，粗略判断一下就足够了。

offslab_limit = size - sizeof(struct slab);

offslab_limit /= sizeof(kmem_bufctl_t);

/* 对象数目大于限制，跳出循环，不再尝试更大的order，避免slab中对象数目过多，此时计算的对象数也是有效的，循环一次没什么 */

if (num > offslab_limit)

break;

}

/* Found something acceptable - save it away */

/* 每个slab中的对象数 */

cachep->num = num;

/* slab的order，即由几个页面组成 */

cachep->gfporder = gfporder;

/* slab中剩余空间（碎片）的大小 */

left_over = remainder;

* A VFS-reclaimable slab tends to have most allocations

* as GFP_NOFS and we really don't want to have to be allocating

* higher-order pages when we are unable to shrink dcache.

/* SLAB_RECLAIM_ACCOUNT表示此slab所占页面为可回收的，当内核检测是否有足够的页面满足用户态的需求时，此类页面将被计算在内，通过调用kmem_freepages()函数可以释放分配给slab的页框。由于是可回收的，所以不需要做后面的碎片检测了 */

if (flags & SLAB_RECLAIM_ACCOUNT)

break;

* Large number of objects is good, but very large slabs are

* currently bad for the gfp()s.

/* slab_break_gfp_order为slab所占页面的阀门，超过这个阀门时，无论碎片大小，都不再检测更高的order了 */

if (gfporder >= slab_break_gfp_order)

break;

* Acceptable internal fragmentation?

/* slab所占页面的大小是碎片大小的8倍以上，页面利用率较高，可以接受这样的order */

if (left_over * 8 <= (PAGE_SIZE << gfporder))

break;

}

/* 返回碎片大小 */

return left_over;

}

cache_estimate

计算每个slab中对象的数目。

参数：

1) gfporder：slab由2^gfporder个页面组成。

2) buffer_size：对象的大小。

3) align：对象的对齐方式。

4) flags：内置式slab还是外置式slab。

5) left_over：slab中浪费空间的大小。

6) num：slab中的对象数目。

static void cache_estimate(unsigned long gfporder, size_t buffer_size,

size_t align, int flags, size_t *left_over,

unsigned int *num)

{

int nr_objs;

size_t mgmt_size;

/* slab大小为2^order个页面 */

size_t slab_size = PAGE_SIZE << gfporder;

* The slab management structure can be either off the slab or

* on it. For the latter case, the memory allocated for a

* slab is used for:

* - The struct slab

* - One kmem_bufctl_t for each object

* - Padding to respect alignment of @align

* - @buffer_size bytes for each object

* If the slab management structure is off the slab, then the

* alignment will already be calculated into the size. Because

* the slabs are all pages aligned, the objects will be at the

* correct alignment when allocated.

if (flags & CFLGS_OFF_SLAB) {

/* 外置式slab */

mgmt_size = 0;

/* slab页面不含slab管理对象，全部用来存储slab对象 */

nr_objs = slab_size / buffer_size;

/* 对象数不能超过上限 */

if (nr_objs > SLAB_LIMIT)

nr_objs = SLAB_LIMIT;

} else {

* Ignore padding for the initial guess. The padding

* is at most @align-1 bytes, and @buffer_size is at

* least @align. In the worst case, this result will

* be one greater than the number of objects that fit

* into the memory allocation when taking the padding

* into account.

/* 内置式slab，slab管理对象与slab对象在一起，此时slab页面中包含：一个struct slab对象，一个kmem_bufctl_t数组，slab对象。kmem_bufctl_t数组大小与slab对象数目相同 */

nr_objs = (slab_size - sizeof(struct slab)) /

(buffer_size + sizeof(kmem_bufctl_t));

* This calculated number will be either the right

* amount, or one greater than what we want.

/* 计算cache line对齐后的大小，如果超出了slab总的大小，则对象数减一 */

if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size

> slab_size)

nr_objs--;

/* 对象数不能超过上限 */

if (nr_objs > SLAB_LIMIT)

nr_objs = SLAB_LIMIT;

/* 计算cache line对齐后slab管理对象的大小 */

mgmt_size = slab_mgmt_size(nr_objs, align);

}

/* 保存slab对象数目 */

*num = nr_objs;

/* 计算浪费空间的大小 */

*left_over = slab_size - nr_objs*buffer_size - mgmt_size;

}

kmem_find_general_cachep

在general cache中分配一个struct kmem_cache对象。直接调用__find_general_cachep。

static inline struct kmem_cache *__find_general_cachep(size_t size,

gfp_t gfpflags)

{

struct cache_sizes *csizep = malloc_sizes;

#if DEBUG

/* This happens if someone tries to call

* kmem_cache_create(), or __kmalloc(), before

* the generic caches are initialized.

BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);

#endif

if (!size)

return ZERO_SIZE_PTR;

/* 找到合适的malloc size */

while (size > csizep->cs_size)

csizep++;

* Really subtle: The last entry with cs->cs_size==ULONG_MAX

* has cs_{dma,}cachep==NULL. Thus no special case

* for large kmalloc calls required.

#ifdef CONFIG_ZONE_DMA

if (unlikely(gfpflags & GFP_DMA))

return csizep->cs_dmacachep;

#endif

/* 返回该大小级别的cache */

return csizep->cs_cachep;

}

setup_cpu_cache

配置local cache和slab三链。

static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)

{

/* general cache初始化完毕，配置每个cpu的local cache */

if (g_cpucache_up == FULL)

return enable_cpucache(cachep, gfp);

/* 此时处于系统初始化阶段，g_cpucache_up记录general cache初始化的进度，比如PARTIAL_AC表示struct array_cache所在的cache已经创建，PARTIAL_L3表示struct kmem_list3所在的cache已经创建，注意创建这两个cache的先后顺序。在初始化阶段只需配置主cpu的local cache和slab三链 */

if (g_cpucache_up == NONE) {

* Note: the first kmem_cache_create must create the cache

* that's used by kmalloc(24), otherwise the creation of

* further caches will BUG().

/* 初始化阶段创建struct array_cache所在cache时进入这个流程，此时struct array_cache所在的general cache还未创建，只能使用静态分配的全局变量initarray_generic表示的local cache */

cachep->array[smp_processor_id()] = &initarray_generic.cache;

* If the cache that's used by kmalloc(sizeof(kmem_list3)) is

* the first cache, then we need to set up all its list3s,

* otherwise the creation of further caches will BUG().

/* 创建struct kmem_list3所在的cache是在struct array_cache所在cache之后，所以此时struct kmem_list3所在的cache也一定没有创建，也需要使用全局变量 */

set_up_list3s(cachep, SIZE_AC);

/* 执行到这struct array_cache所在的cache创建完毕，如果struct kmem_list3和struct array_cache位于同一个general cache中，不会再重复创建了，g_cpucache_up表示的进度更进一步 */

if (INDEX_AC == INDEX_L3)

g_cpucache_up = PARTIAL_L3;

else

g_cpucache_up = PARTIAL_AC;

} else {

/* g_cpucache_up至少为PARTIAL_AC时进入这个流程，struct array_cache所在的general cache已经建立起来，可以通过kmalloc分配了 */

cachep->array[smp_processor_id()] =

kmalloc(sizeof(struct arraycache_init), gfp);

if (g_cpucache_up == PARTIAL_AC) {

/* struct kmem_list3所在cache仍未创建完毕，还需使用全局的slab三链 */

set_up_list3s(cachep, SIZE_L3);

/* 后面将会分析kmem_cache_init函数，只有创建struct kmem_list3所在cache时才会进入此流程，上面的代码执行完，struct kmem_list3所在cache也就创建完毕可以使用了，更新g_cpucache_up */

g_cpucache_up = PARTIAL_L3;

} else {

/* struct kmem_list3所在的cache和struct array_cache所在cache都已经创建完毕 */

int node;

for_each_online_node(node) {

/* 通过kmalloc分配struct kmem_list3对象 */

cachep->nodelists[node] =

kmalloc_node(sizeof(struct kmem_list3),

gfp, node);

BUG_ON(!cachep->nodelists[node]);

/* 初始化slab三链 */

kmem_list3_init(cachep->nodelists[node]);

}

/* 设置回收时间 */

cachep->nodelists[numa_mem_id()]->next_reap =

jiffies + REAPTIMEOUT_LIST3 +

((unsigned long)cachep) % REAPTIMEOUT_LIST3;

cpu_cache_get(cachep)->avail = 0;

cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;

cpu_cache_get(cachep)->batchcount = 1;

cpu_cache_get(cachep)->touched = 0;

cachep->batchcount = 1;

cachep->limit = BOOT_CPUCACHE_ENTRIES;

return 0;

}

enable_cpucache

使能local cache。

static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)

{

int err;

int limit, shared;

* The head array serves three purposes:

* - create a LIFO ordering, i.e. return objects that are cache-warm

* - reduce the number of spinlock operations.

* - reduce the number of linked list operations on the slab and

* bufctl chains: array operations are cheaper.

* The numbers are guessed, we should auto-tune as described by

* Bonwick.

/* 根据对象大小计算local cache中对象数目 */

if (cachep->buffer_size > 131072)

limit = 1;

else if (cachep->buffer_size > PAGE_SIZE)

limit = 8;

else if (cachep->buffer_size > 1024)

limit = 24;

else if (cachep->buffer_size > 256)

limit = 54;

else

limit = 120;

* CPU bound tasks (e.g. network routing) can exhibit cpu bound

* allocation behaviour: Most allocs on one cpu, most free operations

* on another cpu. For these cases, an efficient object passing between

* cpus is necessary. This is provided by a shared array. The array

* replaces Bonwick's magazine layer.

* On uniprocessor, it's functionally equivalent (but less efficient)

* to a larger limit. Thus disabled by default.

shared = 0;

/* 多核系统，设置shared local cache中对象数目 */

if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1)

shared = 8;

#if DEBUG

* With debugging enabled, large batchcount lead to excessively long

* periods with disabled local interrupts. Limit the batchcount

if (limit > 32)

limit = 32;

#endif

/* 配置local cache */

err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);

if (err)

printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",

cachep->name, -err);

return err;

}

do_tune_cpucache

配置local cache、shared local cache和slab三链。

static int do_tune_cpucache(struct kmem_cache *cachep, int limit,

int batchcount, int shared, gfp_t gfp)

{

struct ccupdate_struct *new;

int i;

new = kzalloc(sizeof(*new), gfp);

if (!new)

return -ENOMEM;

/* 为每个cpu分配新的struct array_cache对象 */

for_each_online_cpu(i) {

new->new[i] = alloc_arraycache(cpu_to_mem(i), limit,

batchcount, gfp);

if (!new->new[i]) {

for (i--; i >= 0; i--)

kfree(new->new[i]);

kfree(new);

return -ENOMEM;

}

new->cachep = cachep;

/* 用新的struct array_cache对象替换旧的struct array_cache对象，在支持cpu热插拔的系统上，离线cpu可能没有释放local cache，使用的仍是旧local cache，参见__kmem_cache_destroy函数。虽然cpu up时要重新配置local cache，也无济于事。考虑下面的情景：共有Cpu A和Cpu B，Cpu B down后，destroy Cache X，由于此时Cpu B是down状态，所以Cache X中Cpu B的local cache未释放，过一段时间Cpu B又up了，更新cache_chain 链中所有cache的local cache，但此时Cache X对象已经释放回cache_cache中了，其Cpu B local cache并未被更新。又过了一段时间，系统需要创建新的cache，将Cache X对象分配出去，其Cpu B仍然是旧的local cache，需要进行更新。

on_each_cpu(do_ccupdate_local, (void *)new, 1);

check_irq_on();

cachep->batchcount = batchcount;

cachep->limit = limit;

cachep->shared = shared;

/* 释放旧的local cache */

for_each_online_cpu(i) {

struct array_cache *ccold = new->new[i];

if (!ccold)

continue;

spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);

/* 释放旧local cache中的对象 */

free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));

spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);

/* 释放旧的struct array_cache对象 */

kfree(ccold);

}

kfree(new);

/* 初始化shared local cache 和slab三链 */

return alloc_kmemlist(cachep, gfp);

}

alloc_arraycache

分配struct array_cache对象。

static struct array_cache *alloc_arraycache(int node, int entries,

int batchcount, gfp_t gfp)

{

/* struct array_cache后面紧接着的是entry数组，合在一起申请内存 */

int memsize = sizeof(void *) * entries + sizeof(struct array_cache);

struct array_cache *nc = NULL;

/* 分配一个local cache对象，kmalloc从general cache中分配 */

nc = kmalloc_node(memsize, gfp, node);

* The array_cache structures contain pointers to free object.

* However, when such objects are allocated or transfered to another

* cache the pointers are not cleared and they could be counted as

* valid references during a kmemleak scan. Therefore, kmemleak must

* not scan such objects.

kmemleak_no_scan(nc);

/* 初始化local cache */

if (nc) {

nc->avail = 0;

nc->limit = entries;

nc->batchcount = batchcount;

nc->touched = 0;

spin_lock_init(&nc->lock);

}

return nc;

}

do_ccupdate_local

更新每个cpu的struct array_cache对象。

static void do_ccupdate_local(void *info)

{

struct ccupdate_struct *new = info;

struct array_cache *old;

check_irq_off();

old = cpu_cache_get(new->cachep);

/* 指向新的struct array_cache对象 */

new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];

/* 保存旧的struct array_cache对象 */

new->new[smp_processor_id()] = old;

}

alloc_kmemlist

初始化shared local cache和slab三链，初始化完成后，slab三链中没有任何slab。

static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)

{

int node;

struct kmem_list3 *l3;

struct array_cache *new_shared;

struct array_cache **new_alien = NULL;

for_each_online_node(node) {

/* NUMA相关 */

if (use_alien_caches) {

new_alien = alloc_alien_cache(node, cachep->limit, gfp);

if (!new_alien)

goto fail;

}

new_shared = NULL;

if (cachep->shared) {

/* 分配shared local cache */

new_shared = alloc_arraycache(node,

cachep->shared*cachep->batchcount,

0xbaadf00d, gfp);

if (!new_shared) {

free_alien_cache(new_alien);

goto fail;

}

/* 获得旧的slab三链 */

l3 = cachep->nodelists[node];

if (l3) {

/* 就slab三链指针不为空，需要先释放旧的资源 */

struct array_cache *shared = l3->shared;

spin_lock_irq(&l3->list_lock);

/* 释放旧的shared local cache中的对象 */

if (shared)

free_block(cachep, shared->entry,

shared->avail, node);

/* 指向新的shared local cache */

l3->shared = new_shared;

if (!l3->alien) {

l3->alien = new_alien;

new_alien = NULL;

}

/* 计算cache中空闲对象的上限 */

l3->free_limit = (1 + nr_cpus_node(node)) *

cachep->batchcount + cachep->num;

spin_unlock_irq(&l3->list_lock);

/* 释放旧shared local cache的struct array_cache对象 */

kfree(shared);

/* NUMA相关 */

free_alien_cache(new_alien);

continue;

}

/* 分配新的slab三链 */

l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node);

if (!l3) {

free_alien_cache(new_alien);

kfree(new_shared);

goto fail;

}

/* 初始化slab三链 */

kmem_list3_init(l3);

l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +

((unsigned long)cachep) % REAPTIMEOUT_LIST3;

l3->shared = new_shared;

l3->alien = new_alien;

l3->free_limit = (1 + nr_cpus_node(node)) *

cachep->batchcount + cachep->num;

cachep->nodelists[node] = l3;

}

return 0;

fail:

if (!cachep->next.next) {

/* Cache is not active yet. Roll back what we did */

node--;

while (node >= 0) {

if (cachep->nodelists[node]) {

l3 = cachep->nodelists[node];

kfree(l3->shared);

free_alien_cache(l3->alien);

kfree(l3);

cachep->nodelists[node] = NULL;

}

node--;

}

return -ENOMEM;

}

set_up_list3s

设置cache的slab三链指向静态分配的全局变量。

static void __init set_up_list3s(struct kmem_cache *cachep, int index)

{

int node;

/* UMA只有一个节点 */

for_each_online_node(node) {

/* 全局变量initkmem_list3是初始化阶段使用的slab三链 */

cachep->nodelists[node] = &initkmem_list3[index + node];

/* 设置回收时间 */

cachep->nodelists[node]->next_reap = jiffies +

REAPTIMEOUT_LIST3 +

((unsigned long)cachep) % REAPTIMEOUT_LIST3;

}

阅读(1105) | 评论(0) | 转发(0) |

上一篇：Vi Linux内存之 Slab分配器（七）

下一篇：Vi Linux内存之 Slab分配器（五）

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6