Chinaunix首页 | 论坛 | 博客
  • 博客访问: 148195
  • 博文数量: 38
  • 博客积分: 1315
  • 博客等级: 准尉
  • 技术积分: 850
  • 用 户 组: 普通用户
  • 注册时间: 2011-01-06 16:19
文章分类

全部博文(38)

文章存档

2011年(38)

分类: LINUX

2011-03-03 16:50:32

前面介绍了slab级的代码,本文继续介绍更高一级,也就是cache部分的代码,先来看cache的创建。
 
kmem_cache_create

创建slab系统顶层的cache节点。创建完成后,cache里并没有任何slab以及对象,只有当分配对象,并且cache中没有空闲对象时,才会创建新的slab

参数:

1)        namecache的名字。

2)        size:对象的大小。

3)        align:对齐方式。

4)        flags:标志。

5)        ctor:构造函数指针。

 

struct kmem_cache *

kmem_cache_create (const char *name, size_t size, size_t align,

       unsigned long flags, void (*ctor)(void *))

{

       size_t left_over, slab_size, ralign;

       struct kmem_cache *cachep = NULL, *pc;

       gfp_t gfp;

 

       /*

        * Sanity checks... these are all serious usage bugs.

        */

       /* 安全性检查 */

       if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||

           size > KMALLOC_MAX_SIZE) {

                      printk(KERN_ERR "%s: Early error in slab %s\n", __func__,

                                                  name);

                      BUG();

       }

 

       /*

        * We use cache_chain_mutex to ensure a consistent view of

        * cpu_online_mask as well.  Please see cpuup_callback

        */

/* slab分配器是否已经初始化好,如果是内核启动阶段,则只有一个cpu执行slab分配器的初始化动作,无需加锁,否则需要加锁 */

       if (slab_is_available()) {

                      get_online_cpus();

                      mutex_lock(&cache_chain_mutex);

       }

       /* 遍历cache链,做些校验工作 */

       list_for_each_entry(pc, &cache_chain, next) {

                      char tmp;

                      int res;

 

                      /*

                       * This happens when the module gets unloaded and doesn't

                       * destroy its slab cache and no-one else reuses the vmalloc

                       * area of the module.  Print a warning.

                       */

                      /* 检查cache链表中的cache是否都有名字 */

                      res = probe_kernel_address(pc->name, tmp);

                      if (res) {

                                    printk(KERN_ERR

                                           "SLAB: cache with size %d has lost its name\n",

                                           pc->buffer_size);

                                    continue;

                      }

                      /* 检查cache链表中是否已经存在相同名字的cache */

                      if (!strcmp(pc->name, name)) {

                                    printk(KERN_ERR

                                           "kmem_cache_create: duplicate cache %s\n", name);

                                    dump_stack();

                                    goto oops;

                      }

       }

……

 

       /* slab分配器是否已经准备好 */

       if (slab_is_available())

                      gfp = GFP_KERNEL;

       else

                      /* slab初始化好之前,不允许阻塞,且只能在低端内存区分配 */

                      gfp = GFP_NOWAIT;

 

       /* Get cache's description obj. */

/* 获得struct kmem_cache对象 */

       cachep = kmem_cache_zalloc(&cache_cache, gfp);

       if (!cachep)

                      goto oops;

……

       /*

        * Determine if the slab management is 'on' or 'off' slab.

        * (bootstrapping cannot cope with offslab caches so don't do

        * it too early on. Always use on-slab management when

        * SLAB_NOLEAKTRACE to avoid recursive calls into kmemleak)

        */

/* 确定slab管理对象的存储方式:内置还是外置。通常,当对象大于等于512时,使用外置方式。初始化阶段采用内置式。slab_early_init 参见kmem_cache_init函数 */

       if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init &&

           !(flags & SLAB_NOLEAKTRACE))

                      /*

                       * Size is large, assume best to place the slab management obj

                       * off-slab (should allow better packing of objs).

                       */

                      flags |= CFLGS_OFF_SLAB;

       /* 对齐 */

       size = ALIGN(size, align);

       /* 获得slab中碎片的大小 */

       left_over = calculate_slab_order(cachep, size, align, flags);

/* cachep->num为该cache中每个slab的对象数,为0,表示为该对象创建cache失败 */

       if (!cachep->num) {

                      printk(KERN_ERR

                             "kmem_cache_create: couldn't create cache %s.\n", name);

                      kmem_cache_free(&cache_cache, cachep);

                      cachep = NULL;

                      goto oops;

       }

       /* 计算slab管理对象的大小,包括struct slab对象和kmem_bufctl_t数组 */

       slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)

                                      + sizeof(struct slab), align);

 

       /*

        * If the slab has been placed off-slab, and we have enough space then

        * move it on-slab. This is at the expense of any extra colouring.

        */

/* 如果这是一个外置式slab,并且碎片大小大于slab管理对象的大小,则可将slab管理对象移到slab中,改造成一个内置式slab */

       if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {

                      /* 除去off-slab标志位 */

                      flags &= ~CFLGS_OFF_SLAB;

                      /* 更新碎片大小 */

                      left_over -= slab_size;

       }

 

       if (flags & CFLGS_OFF_SLAB) {

                      /* really off slab. No need for manual alignment */

/* align是针对slab对象的,如果slab管理对象是外置存储,自然不会像内置那样影响到后面slab对象的存储位置,也就不需要对齐了 */

                      slab_size =

                          cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);

 

#ifdef CONFIG_PAGE_POISONING

                      /* If we're going to use the generic kernel_map_pages()

                       * poisoning, then it's going to smash the contents of

                       * the redzone and userword anyhow, so switch them off.

                       */

                      if (size % PAGE_SIZE == 0 && flags & SLAB_POISON)

                                    flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);

#endif

       }

       /* cache的着色块的单位大小 */

       cachep->colour_off = cache_line_size();

       /* Offset must be a multiple of the alignment. */

       /* 着色块大小必须是对象要求对齐方式的倍数 */

       if (cachep->colour_off < align)

                      cachep->colour_off = align;

       /* 计算碎片区需要多少个着色快 */

       cachep->colour = left_over / cachep->colour_off;

       /* slab管理对象的大小 */

       cachep->slab_size = slab_size;

       cachep->flags = flags;

       cachep->gfpflags = 0;

       if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))

                      cachep->gfpflags |= GFP_DMA;

       /* slab对象的大小 */

       cachep->buffer_size = size;

       /* 计算对象在slab中索引时用参见obj_to_index函数 */

       cachep->reciprocal_buffer_size = reciprocal_value(size);

 

       if (flags & CFLGS_OFF_SLAB) {

/* 分配一个slab管理对象,保存在slabp_cache中,如果是内置式的slab,此指针为空 */

                      cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);

                      /*

                       * This is a possibility for one of the malloc_sizes caches.

                       * But since we go off slab only for object size greater than

                       * PAGE_SIZE/8, and malloc_sizes gets created in ascending order,

                       * this should not happen at all.

                       * But leave a BUG_ON for some lucky dude.

                       */

                      BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));

       }

       /* 对象的构造函数 */

       cachep->ctor = ctor;

       /* cache的名字 */

       cachep->name = name;

       /* 设置每个cpu上的local cache */

       if (setup_cpu_cache(cachep, gfp)) {

                      __kmem_cache_destroy(cachep);

                      cachep = NULL;

                      goto oops;

       }

 

       /* cache setup completed, link it into the list */

       /* cache创建完毕,将其加入到全局slab cache链表中 */

       list_add(&cachep->next, &cache_chain);

oops:

       if (!cachep && (flags & SLAB_PANIC))

                      panic("kmem_cache_create(): failed to create slab `%s'\n",

                            name);

       if (slab_is_available()) {

                      mutex_unlock(&cache_chain_mutex);

                      put_online_cpus();

       }

       return cachep;

}

 

calculate_slab_order

计算slab由几个页面组成,同时计算每个slab中有多少对象。

static size_t calculate_slab_order(struct kmem_cache *cachep,

                                    size_t size, size_t align, unsigned long flags)

{

       unsigned long offslab_limit;

       size_t left_over = 0;

       int gfporder;

 

       for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {

                      unsigned int num;

                      size_t remainder;

                      /* 计算slab中对象数 */

                      cache_estimate(gfporder, size, align, flags, &remainder, &num);

                      /* 对象数为0,表示此order下,一个对象都放不下,检查下一order */

                      if (!num)

                                    continue;

 

                      if (flags & CFLGS_OFF_SLAB) {

                                    /*

                                     * Max number of objs-per-slab for caches which

                                     * use off-slab slabs. Needed to avoid a possible

                                     * looping condition in cache_grow().

                                     */

/* 创建一个外置式slab时,要相应分配该slab的管理对象,包含struct slab对象和kmem_bufctl_t数组,分配管理对象的流程就是分配普通对象的流程,再来看一下分配对象的流程:

kmem_cache_alloc->__cache_alloc-> __do_cache_alloc-> ____cache_alloc-> cache_alloc_refill-> cache_grow-> alloc_slabmgmt-> kmem_cache_alloc_node-> kmem_cache_alloc

可以看出这里可能存在一个循环,循环的关键在于alloc_slabmgmt函数,当slab管理对象是off-slab方式时,就形成了循环。那么什么时候slab管理对象会采用外置式slab呢?显然当其管理的slab中对象很多,从而kmem_bufctl_t数组很大,致使整个管理对象也很大,此时才会形成循环。故需要对kmem_bufctl_t的数目做限制,下面的算法是很粗略的,既然对象大小为size时,是外置式slab,那么我们假设管理对象的大小也是size,计算出kmem_bufctl_t数组的大小,即此大小的kmem_bufctl_t数组一定会造成管理对象是外置式slab。之所以说粗略,是指数组大小小于这个限制时,也不能确保管理对象一定是内置式slab。但这也不会引发错误,因为还有一个slab_break_gfp_order阀门来控制每个slab所占页面数,通常其值为1,即每个slab最多两个页面,外置式slab存放的都是大于512的大对象,所以slab中不会有太多的大对象,kmem_bufctl_t数组也不会很大,粗略判断一下就足够了。

*/

                                    offslab_limit = size - sizeof(struct slab);

                                    offslab_limit /= sizeof(kmem_bufctl_t);

/* 对象数目大于限制,跳出循环,不再尝试更大的order,避免slab中对象数目过多,此时计算的对象数也是有效的,循环一次没什么 */

                                   if (num > offslab_limit)

                                                  break;

                      }

 

                      /* Found something acceptable - save it away */

                      /* 每个slab中的对象数 */

                      cachep->num = num;

                      /* slaborder,即由几个页面组成 */

                      cachep->gfporder = gfporder;

                      /* slab中剩余空间(碎片)的大小 */

                      left_over = remainder;

 

                      /*

                       * A VFS-reclaimable slab tends to have most allocations

                       * as GFP_NOFS and we really don't want to have to be allocating

                       * higher-order pages when we are unable to shrink dcache.

                       */

/* SLAB_RECLAIM_ACCOUNT表示此slab所占页面为可回收的,当内核检测是否有足够的页面满足用户态的需求时,此类页面将被计算在内,通过调用kmem_freepages()函数可以释放分配给slab的页框。由于是可回收的,所以不需要做后面的碎片检测了 */

                      if (flags & SLAB_RECLAIM_ACCOUNT)

                                    break;

 

                      /*

                       * Large number of objects is good, but very large slabs are

                       * currently bad for the gfp()s.

                       */

/* slab_break_gfp_orderslab所占页面的阀门,超过这个阀门时,无论碎片大小,都不再检测更高的order */

                      if (gfporder >= slab_break_gfp_order)

                                    break;

 

                      /*

                       * Acceptable internal fragmentation?

                       */

/* slab所占页面的大小是碎片大小的8倍以上,页面利用率较高,可以接受这样的order */

                      if (left_over * 8 <= (PAGE_SIZE << gfporder))

                                    break;

       }

       /* 返回碎片大小 */

       return left_over;

}

 

cache_estimate

计算每个slab中对象的数目。

参数:

1)        gfporderslab2gfporder个页面组成。

2)        buffer_size:对象的大小。

3)        align:对象的对齐方式。

4)        flags:内置式slab还是外置式slab

5)        left_overslab中浪费空间的大小。

6)        numslab中的对象数目。

 

static void cache_estimate(unsigned long gfporder, size_t buffer_size,

                                       size_t align, int flags, size_t *left_over,

                                       unsigned int *num)

{

       int nr_objs;

       size_t mgmt_size;

       /* slab大小为2order个页面 */

       size_t slab_size = PAGE_SIZE << gfporder;

 

       /*

        * The slab management structure can be either off the slab or

        * on it. For the latter case, the memory allocated for a

        * slab is used for:

        *

        * - The struct slab

        * - One kmem_bufctl_t for each object

        * - Padding to respect alignment of @align

        * - @buffer_size bytes for each object

        *

        * If the slab management structure is off the slab, then the

        * alignment will already be calculated into the size. Because

        * the slabs are all pages aligned, the objects will be at the

        * correct alignment when allocated.

        */

       if (flags & CFLGS_OFF_SLAB) {

/* 外置式slab */

                      mgmt_size = 0;

                      /* slab页面不含slab管理对象全部用来存储slab对象 */

                      nr_objs = slab_size / buffer_size;

                      /* 对象数不能超过上限 */

                      if (nr_objs > SLAB_LIMIT)

                                    nr_objs = SLAB_LIMIT;

       } else {                 

                      /*

                       * Ignore padding for the initial guess. The padding

                       * is at most @align-1 bytes, and @buffer_size is at

                       * least @align. In the worst case, this result will

                       * be one greater than the number of objects that fit

                       * into the memory allocation when taking the padding

                       * into account.

                       */

/* 内置式slabslab管理对象与slab对象在一起,此时slab页面中包含:一个struct slab对象,一个kmem_bufctl_t数组,slab对象。kmem_bufctl_t数组大小与slab对象数目相同 */

                      nr_objs = (slab_size - sizeof(struct slab)) /

                                      (buffer_size + sizeof(kmem_bufctl_t));

 

                      /*

                       * This calculated number will be either the right

                       * amount, or one greater than what we want.

                       */

                      /* 计算cache line对齐后的大小,如果超出了slab总的大小,则对象数减一 */

                      if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size

                             > slab_size)

                                    nr_objs--;

                      /* 对象数不能超过上限 */

                      if (nr_objs > SLAB_LIMIT)

                                    nr_objs = SLAB_LIMIT;

                      /* 计算cache line对齐后slab管理对象的大小 */

                      mgmt_size = slab_mgmt_size(nr_objs, align);

       }

       /* 保存slab对象数目 */

       *num = nr_objs;

       /* 计算浪费空间的大小 */

       *left_over = slab_size - nr_objs*buffer_size - mgmt_size;

}

 

kmem_find_general_cachep

general cache中分配一个struct kmem_cache对象。直接调用__find_general_cachep

 

static inline struct kmem_cache *__find_general_cachep(size_t size,

                                                                                             gfp_t gfpflags)

{

       struct cache_sizes *csizep = malloc_sizes;

 

#if DEBUG

       /* This happens if someone tries to call

        * kmem_cache_create(), or __kmalloc(), before

        * the generic caches are initialized.

        */

       BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);

#endif

       if (!size)

                      return ZERO_SIZE_PTR;

       /* 找到合适的malloc size */

       while (size > csizep->cs_size)

                      csizep++;

 

       /*

        * Really subtle: The last entry with cs->cs_size==ULONG_MAX

        * has cs_{dma,}cachep==NULL. Thus no special case

        * for large kmalloc calls required.

        */

#ifdef CONFIG_ZONE_DMA

       if (unlikely(gfpflags & GFP_DMA))

                      return csizep->cs_dmacachep;

#endif

       /* 返回该大小级别的cache */

       return csizep->cs_cachep;

}

 

setup_cpu_cache

配置local cacheslab三链。

static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)

{     

       /* general cache初始化完毕,配置每个cpulocal cache */

       if (g_cpucache_up == FULL)

                      return enable_cpucache(cachep, gfp);

/* 此时处于系统初始化阶段,g_cpucache_up记录general cache初始化的进度,比如PARTIAL_AC表示struct array_cache所在的cache已经创建,PARTIAL_L3表示struct kmem_list3所在的cache已经创建,注意创建这两个cache的先后顺序。在初始化阶段只需配置主cpulocal cacheslab三链 */

       if (g_cpucache_up == NONE) {

                      /*

                       * Note: the first kmem_cache_create must create the cache

                       * that's used by kmalloc(24), otherwise the creation of

                       * further caches will BUG().

                       */

/* 初始化阶段创建struct array_cache所在cache时进入这个流程,此时struct array_cache所在的general cache还未创建,只能使用静态分配的全局变量initarray_generic表示的local cache */

                      cachep->array[smp_processor_id()] = &initarray_generic.cache;

 

                      /*

                       * If the cache that's used by kmalloc(sizeof(kmem_list3)) is

                       * the first cache, then we need to set up all its list3s,

                       * otherwise the creation of further caches will BUG().

                       */

/* 创建struct kmem_list3所在的cache是在struct array_cache所在cache之后,所以此时struct kmem_list3所在的cache也一定没有创建,也需要使用全局变量 */

                      set_up_list3s(cachep, SIZE_AC);

/* 执行到这struct array_cache所在的cache创建完毕,如果struct kmem_list3struct array_cache位于同一个general cache中,不会再重复创建了,g_cpucache_up表示的进度更进一步 */

                      if (INDEX_AC == INDEX_L3)

                                    g_cpucache_up = PARTIAL_L3;

                      else

                                    g_cpucache_up = PARTIAL_AC;

       } else {

/* g_cpucache_up至少为PARTIAL_AC时进入这个流程,struct array_cache所在的general cache已经建立起来,可以通过kmalloc分配了 */

                      cachep->array[smp_processor_id()] =

                                    kmalloc(sizeof(struct arraycache_init), gfp);

                      if (g_cpucache_up == PARTIAL_AC) {

                                    /* struct kmem_list3所在cache仍未创建完毕,还需使用全局的slab三链 */

                                    set_up_list3s(cachep, SIZE_L3);

/* 后面将会分析kmem_cache_init函数,只有创建struct kmem_list3所在cache时才会进入此流程,上面的代码执行完,struct kmem_list3所在cache也就创建完毕可以使用了,更新g_cpucache_up */

                                    g_cpucache_up = PARTIAL_L3;

                      } else {

/* struct kmem_list3所在的cachestruct array_cache所在cache都已经创建完毕 */

                                    int node;

                                    for_each_online_node(node) {

                                                  /* 通过kmalloc分配struct kmem_list3对象 */

                                                  cachep->nodelists[node] =

                                                      kmalloc_node(sizeof(struct kmem_list3),

                                                                               gfp, node);

                                                  BUG_ON(!cachep->nodelists[node]);

                                                  /* 初始化slab三链 */

                                                  kmem_list3_init(cachep->nodelists[node]);

                                    }

                      }

       }

       /* 设置回收时间 */

       cachep->nodelists[numa_mem_id()]->next_reap =

                                    jiffies + REAPTIMEOUT_LIST3 +

                                    ((unsigned long)cachep) % REAPTIMEOUT_LIST3;

 

       cpu_cache_get(cachep)->avail = 0;

       cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;

       cpu_cache_get(cachep)->batchcount = 1;

       cpu_cache_get(cachep)->touched = 0;

       cachep->batchcount = 1;

       cachep->limit = BOOT_CPUCACHE_ENTRIES;

       return 0;

}

 

enable_cpucache

使能local cache

static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)

{

       int err;

       int limit, shared;

 

       /*

        * The head array serves three purposes:

        * - create a LIFO ordering, i.e. return objects that are cache-warm

        * - reduce the number of spinlock operations.

        * - reduce the number of linked list operations on the slab and

        *   bufctl chains: array operations are cheaper.

        * The numbers are guessed, we should auto-tune as described by

        * Bonwick.

        */

       /* 根据对象大小计算local cache中对象数目 */

       if (cachep->buffer_size > 131072)

                      limit = 1;

       else if (cachep->buffer_size > PAGE_SIZE)

                      limit = 8;

       else if (cachep->buffer_size > 1024)

                      limit = 24;

       else if (cachep->buffer_size > 256)

                      limit = 54;

       else

                      limit = 120;

 

       /*

        * CPU bound tasks (e.g. network routing) can exhibit cpu bound

        * allocation behaviour: Most allocs on one cpu, most free operations

        * on another cpu. For these cases, an efficient object passing between

        * cpus is necessary. This is provided by a shared array. The array

        * replaces Bonwick's magazine layer.

        * On uniprocessor, it's functionally equivalent (but less efficient)

        * to a larger limit. Thus disabled by default.

        */

       shared = 0;

       /* 多核系统,设置shared local cache中对象数目 */

       if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1)

                      shared = 8;

 

#if DEBUG

       /*

        * With debugging enabled, large batchcount lead to excessively long

        * periods with disabled local interrupts. Limit the batchcount

        */

       if (limit > 32)

                      limit = 32;

#endif

       /* 配置local cache */

       err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);

       if (err)

                      printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",

                             cachep->name, -err);

       return err;

}

 

do_tune_cpucache

配置local cacheshared local cacheslab三链。

static int do_tune_cpucache(struct kmem_cache *cachep, int limit,

                                                  int batchcount, int shared, gfp_t gfp)

{

       struct ccupdate_struct *new;

       int i;

 

       new = kzalloc(sizeof(*new), gfp);

       if (!new)

                      return -ENOMEM;

       /* 为每个cpu分配新的struct array_cache对象 */

       for_each_online_cpu(i) {

                      new->new[i] = alloc_arraycache(cpu_to_mem(i), limit,

                                                                               batchcount, gfp);

                      if (!new->new[i]) {

                                    for (i--; i >= 0; i--)

                                                  kfree(new->new[i]);

                                    kfree(new);

                                    return -ENOMEM;

                      }

       }

       new->cachep = cachep;

/* 用新的struct array_cache对象替换旧的struct array_cache对象,在支持cpu热插拔的系统上,离线cpu可能没有释放local cache,使用的仍是旧local cache,参见__kmem_cache_destroy函数。虽然cpu up时要重新配置local cache,也无济于事。考虑下面的情景:共有Cpu ACpu BCpu B down后,destroy Cache X,由于此时Cpu Bdown状态,所以Cache XCpu Blocal cache未释放,过一段时间Cpu Bup了,更新cache_chain 链中所有cachelocal cache,但此时Cache X对象已经释放回cache_cache中了,其Cpu B local cache并未被更新。又过了一段时间,系统需要创建新的cache,将Cache X对象分配出去,其Cpu B仍然是旧的local cache,需要进行更新。

*/

       on_each_cpu(do_ccupdate_local, (void *)new, 1);

 

       check_irq_on();

       cachep->batchcount = batchcount;

       cachep->limit = limit;

       cachep->shared = shared;

/* 释放旧的local cache */

       for_each_online_cpu(i) {

                      struct array_cache *ccold = new->new[i];

                      if (!ccold)

                                    continue;

                      spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);

                      /* 释放旧local cache中的对象 */

                      free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));

                      spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);

                      /* 释放旧的struct array_cache对象 */

                      kfree(ccold);

       }

       kfree(new);

       /* 初始化shared local cache slab三链 */

       return alloc_kmemlist(cachep, gfp);

}

 

alloc_arraycache

分配struct array_cache对象。

static struct array_cache *alloc_arraycache(int node, int entries,

                                                                    int batchcount, gfp_t gfp)

{

       /* struct array_cache后面紧接着的是entry数组,合在一起申请内存 */

       int memsize = sizeof(void *) * entries + sizeof(struct array_cache);

       struct array_cache *nc = NULL;

       /* 分配一个local cache对象,kmallocgeneral cache中分配 */

       nc = kmalloc_node(memsize, gfp, node);

       /*

        * The array_cache structures contain pointers to free object.

        * However, when such objects are allocated or transfered to another

        * cache the pointers are not cleared and they could be counted as

        * valid references during a kmemleak scan. Therefore, kmemleak must

        * not scan such objects.

        */

       kmemleak_no_scan(nc);

       /* 初始化local cache */

       if (nc) {

                      nc->avail = 0;

                      nc->limit = entries;

                      nc->batchcount = batchcount;

                      nc->touched = 0;

                      spin_lock_init(&nc->lock);

       }

       return nc;

}

 

do_ccupdate_local

更新每个cpustruct array_cache对象。

static void do_ccupdate_local(void *info)

{

       struct ccupdate_struct *new = info;

       struct array_cache *old;

 

       check_irq_off();

       old = cpu_cache_get(new->cachep);

       /* 指向新的struct array_cache对象 */

       new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];

       /* 保存旧的struct array_cache对象 */

       new->new[smp_processor_id()] = old;

}

 

alloc_kmemlist

初始化shared local cacheslab三链,初始化完成后,slab三链中没有任何slab

static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)

{

       int node;

       struct kmem_list3 *l3;

       struct array_cache *new_shared;

       struct array_cache **new_alien = NULL;

 

       for_each_online_node(node) {

                      /* NUMA相关 */

        if (use_alien_caches) {

            new_alien = alloc_alien_cache(node, cachep->limit, gfp);

            if (!new_alien)

                goto fail;

        }

 

                      new_shared = NULL;

                      if (cachep->shared) {

                                    /* 分配shared local cache */

                                    new_shared = alloc_arraycache(node,

                                                  cachep->shared*cachep->batchcount,

                                                                0xbaadf00d, gfp);

                                    if (!new_shared) {

                                                  free_alien_cache(new_alien);

                                                  goto fail;

                                    }

                      }

                      /* 获得旧的slab三链 */

                      l3 = cachep->nodelists[node];

                      if (l3) {

                                    /* slab三链指针不为空,需要先释放旧的资源 */

                                    struct array_cache *shared = l3->shared;

 

                                    spin_lock_irq(&l3->list_lock);

                                    /* 释放旧的shared local cache中的对象 */

                                    if (shared)

                                                  free_block(cachep, shared->entry,

                                                                               shared->avail, node);

                                    /* 指向新的shared local cache */

                                    l3->shared = new_shared;

                                    if (!l3->alien) {

                                                  l3->alien = new_alien;

                                                  new_alien = NULL;

                                    }

                                    /* 计算cache中空闲对象的上限 */

                                    l3->free_limit = (1 + nr_cpus_node(node)) *

                                                                cachep->batchcount + cachep->num;

                                    spin_unlock_irq(&l3->list_lock);

                                    /* 释放旧shared local cachestruct array_cache对象 */

                                    kfree(shared);

                                    /* NUMA相关 */

                                    free_alien_cache(new_alien);

                                    continue;

                      }

                      /* 分配新的slab三链 */

                      l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node);

                      if (!l3) {

                                    free_alien_cache(new_alien);

                                    kfree(new_shared);

                                    goto fail;

                      }

                      /* 初始化slab三链 */

                      kmem_list3_init(l3);

                      l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +

                                                  ((unsigned long)cachep) % REAPTIMEOUT_LIST3;

                      l3->shared = new_shared;

                      l3->alien = new_alien;

                      l3->free_limit = (1 + nr_cpus_node(node)) *

                                                                cachep->batchcount + cachep->num;

                      cachep->nodelists[node] = l3;

       }

       return 0;

 

fail:

       if (!cachep->next.next) {

                      /* Cache is not active yet. Roll back what we did */

                      node--;

                      while (node >= 0) {

                                    if (cachep->nodelists[node]) {

                                                  l3 = cachep->nodelists[node];

 

                                                  kfree(l3->shared);

                                                  free_alien_cache(l3->alien);

                                                  kfree(l3);

                                                  cachep->nodelists[node] = NULL;

                                    }

                                    node--;

                      }

       }

       return -ENOMEM;

}

 

set_up_list3s

设置cacheslab三链指向静态分配的全局变量。

static void __init set_up_list3s(struct kmem_cache *cachep, int index)

{

       int node;

       /* UMA只有一个节点 */

       for_each_online_node(node) {

                      /* 全局变量initkmem_list3是初始化阶段使用的slab三链 */

                      cachep->nodelists[node] = &initkmem_list3[index + node];

                      /* 设置回收时间 */

                      cachep->nodelists[node]->next_reap = jiffies +

                          REAPTIMEOUT_LIST3 +

                          ((unsigned long)cachep) % REAPTIMEOUT_LIST3;

       }

}

 

阅读(2397) | 评论(0) | 转发(1) |
给主人留下些什么吧!~~