Chinaunix首页 | 论坛 | 博客
  • 博客访问: 1509802
  • 博文数量: 228
  • 博客积分: 1698
  • 博客等级: 上尉
  • 技术积分: 3241
  • 用 户 组: 普通用户
  • 注册时间: 2008-12-24 21:49
个人简介

Linux

文章分类

全部博文(228)

文章存档

2017年(1)

2016年(43)

2015年(102)

2014年(44)

2013年(5)

2012年(30)

2011年(3)

分类: LINUX

2015-01-16 18:10:27

初始化的调用过程:start_kernel()->mm_init()->kmem_cache_init()

下面分析一下具体代码。

点击(此处)折叠或打开

  1. /*
  2.  * Initialisation. Called after the page allocator have been initialised and
  3.  * before smp_init().
  4.  */
  5. void __init kmem_cache_init(void)
  6. {
  7.     size_t left_over;
  8.     struct cache_sizes *sizes;
  9.     struct cache_names *names;
  10.     int i;
  11.     int order;
  12.     int node;

  13.     /× 在非NUMA平台上,将use_alien_cache设置为0,此时cache_free_alien将禁止调用 ×/
  14.     if (num_possible_nodes() == 1
  15.         use_alien_caches = 0;

  16.     /* initkmem_list3为全局变量,此时slab尚未完成初始化,kmalloc无法使用 */
  17.     /* #define NODES_SHIFT     CONFIG_NODES_SHIFT */
  18.     /* #define MAX_NUMNODES    (1 << NODES_SHIFT) */
  19.     /* #define NUM_INIT_LISTS (3 * MAX_NUMNODES) */
  20.     /* CONFIG_NODES_SHIFT是当前系统配置的可支持的NUMA节点的最大个数,针对每个内存节点,包含: 
  21.        struct kmem_cache/struct arraycache_init/struct kmem_list3的slab(full/free/partial)所以是3倍的NUMA节点
  22.     */
  23.     
  24.     for (i = 0; i < NUM_INIT_LISTS; i++) {
  25.         /× 针对链表、锁和成员的初始化,比较简单 ×/
  26.         kmem_list3_init(&initkmem_list3[i]);
  27.         /× cache_cache是全局变量,是内核中第一个cache(struct kmem_cache),遍历当前所有节点,初始化为NULL ×/
  28.         if (i < MAX_NUMNODES
  29.             cache_cache.nodelists[i] = NULL;
  30.     }
  31.     /× 将cache_cache中的nodelists指向initkmem_list3数组中对应成员,按照NUMA节点进行对应 ×/
  32.     /× CACHE_CACHES是cache_cache在内核cache链表中的索引,因为这里是第一个cache,所以为0 ×/
  33.     set_up_list3s(&cache_cache, CACHE_CACHE);

  34.     /*
  35.      * Fragmentation resistance on low memory - only use bigger
  36.      * page orders on machines with more than 32MB of memory.
  37.      */
  38.     /* 当内存大于32M时,slab_break_gfp_order为1(即每个slab最多占用2个页面),否则为0,用于指定每个slab最多占用的页面数量,用于抑制碎片 ×/
  39.     /× 有一种可能是,当对象很大导致slab中一个对象都无法放入时,可以超过该值的限制 ×/
  40.     if (totalram_pages > (32 << 20) >> PAGE_SHIFT)
  41.         slab_break_gfp_order = BREAK_GFP_ORDER_HI;

  42.     /* Bootstrap is tricky, because several objects are allocated
  43.      * from caches that do not exist yet:
  44.      * 1) initialize the cache_cache cache: it contains the struct
  45.      * kmem_cache structures of all caches, except cache_cache itself:
  46.      * cache_cache is statically allocated.
  47.      * Initially an __init data area is used for the head array and the
  48.      * kmem_list3 structures, it's replaced with a kmalloc allocated
  49.      * array at the end of the bootstrap.
  50.      * 2) Create the first kmalloc cache.
  51.      * The struct kmem_cache for the new cache is allocated normally.
  52.      * An __init data area is used for the head array.
  53.      * 3) Create the remaining kmalloc caches, with minimally sized
  54.      * head arrays.
  55.      * 4) Replace the __init data head arrays for cache_cache and the first
  56.      * kmalloc cache with kmalloc allocated arrays.
  57.      * 5) Replace the __init data for kmem_list3 for cache_cache and
  58.      * the other cache's with kmalloc allocated memory.
  59.      * 6) Resize the head arrays of the kmalloc caches to their final sizes.
  60.      */
  61.     /× 根据当前CPU,获取对应的NUMA节点的ID ×/
  62.     node = numa_node_id();

  63.     /* 1) create the cache_cache */
  64.     /× cache_chain 是内核slab cache链表的链表头 ×/
  65.     INIT_LIST_HEAD(&cache_chain);
  66.     /× cache_cache是kernel的第一个slab cache,链接到cache_chain上 ×/
  67.     list_add(&cache_cache.next, &cache_chain);
  68.     /* 设置cache的着色偏移为cache_line_size的大小 */
  69.     cache_cache.colour_off = cache_line_size();
  70.     /× 设置cache_cache的local_cache直接指向全局变量的cache ×/
  71.     cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
  72.     /× 给当前的NUMA内存节点的slab赋值,指向全局变量的slab的几个链表 ×/
  73.     /* 从目前的代码看,此处应该与set_up_list3s重复了,list3s中遍历了所有的NUMA节点进行了赋值,包括了当前的NUMA节点 */
  74.     cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];

  75.     /*
  76.      * struct kmem_cache size depends on nr_node_ids, which
  77.      * can be less than MAX_NUMNODES.
  78.      */
  79.     /× buffer_size保存的是slab中的对象大小,看注释已经很清楚,以nr_node_ids为准,所以对对象大小进行了重新计算 ×/
  80.     cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
  81.                  nr_node_ids * sizeof(struct kmem_list3 *);
  82. #if DEBUG
  83.     cache_cache.obj_size = cache_cache.buffer_size;
  84. #endif
  85.     /× 将对象大小按照cache_line_size进行对齐 ×/
  86.     cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
  87.                     cache_line_size());
  88.     /× 计算对象大小的倒数,用于计算对象在slab中的索引 ×/
  89.     cache_cache.reciprocal_buffer_size =
  90.         reciprocal_value(cache_cache.buffer_size);

  91.     for (order = 0; order < MAX_ORDER; order++) {
  92.         /× 获取cache_cache中的对象的最大数目 ×/
  93.         cache_estimate(order, cache_cache.buffer_size,
  94.             cache_line_size(), 0, &left_over, &cache_cache.num);
  95.         if (cache_cache.num)
  96.             break;
  97.     }
  98.     BUG_ON(!cache_cache.num);
  99.     /× slab包含的页面个数,2^gfporder个 ×/
  100.     cache_cache.gfporder = order;
  101.     /× slab着色区的大小,以colour_off为单位 ×/
  102.     cache_cache.colour = left_over / cache_cache.colour_off;
  103.     /* slab管理区大小 */
  104.     cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
  105.                  sizeof(struct slab), cache_line_size());

  106.     /* 2+3) create the kmalloc caches */
  107.     /* 创建kmalloc所用的general_cache,即普通高速缓存,普通高速缓存分为(2^0)/(2^1)...区域的个数以及大小与系统内存配置
  108.        以及PAGE_SIZE/L1_CACHE_BYTES/KMALLOC_MAX_SIZE相关,具体在linux/kmalloc_sizes.h中定义,每个对应两个高速缓存,
  109.        一个是DMA高速缓存,一个是常规高速缓存,存放在struct cache_sizes malloc_sizes[]中
  110.     ×/
  111.     sizes = malloc_sizes;
  112.     names = cache_names;

  113.     /*
  114.      * Initialize the caches that provide memory for the array cache and the
  115.      * kmem_list3 structures first. Without this, further allocations will
  116.      * bug.
  117.      */
  118.     /* 创建struct arraycache_init对应的普通cache,后续初始化会使用 */
  119.     /× INDEX_AC是计算local cache所用的struct arraycache_init对象在kmalloc size中的索引,即属于哪一级大小的索引,看一下INDEX_AC的定义一切了然 ×/
  120.     sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
  121.                     sizes[INDEX_AC].cs_size,
  122.                     ARCH_KMALLOC_MINALIGN,
  123.                     ARCH_KMALLOC_FLAGS|SLAB_PANIC,
  124.                     NULL);
  125.     /× 如果struct kmem_list3和struct arraycache_init对应的kmalloc size索引不同,则为kmem_list3创建自己的cache,否则共用一个 ×/
  126.     if (INDEX_AC != INDEX_L3) {
  127.         sizes[INDEX_L3].cs_cachep =
  128.             kmem_cache_create(names[INDEX_L3].name,
  129.                 sizes[INDEX_L3].cs_size,
  130.                 ARCH_KMALLOC_MINALIGN,
  131.                 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
  132.                 NULL);
  133.     }
  134.     /× 创建结束以上两个通用cache之后,slab_early_init阶段结束 ×/
  135.     slab_early_init = 0;
  136.     /* 下面开始循环创建kmalloc各个级别的cache,各级别的定义参见linux/kmalloc_sizes.h文件 */
  137.     while (sizes->cs_size != ULONG_MAX) {
  138.         /*
  139.          * For performance, all the general caches are L1 aligned.
  140.          * This should be particularly beneficial on SMP boxes, as it
  141.          * eliminates "false sharing".
  142.          * Note for systems short on memory removing the alignment will
  143.          * allow tighter packing of the smaller caches.
  144.          */
  145.         /× 对应大小的kmalloc的cache还未创建,所以下面需要进行创建 ×/
  146.         if (!sizes->cs_cachep) {
  147.             sizes->cs_cachep = kmem_cache_create(names->name,
  148.                     sizes->cs_size,
  149.                     ARCH_KMALLOC_MINALIGN,
  150.                     ARCH_KMALLOC_FLAGS|SLAB_PANIC,
  151.                     NULL);
  152.         }
  153. #ifdef CONFIG_ZONE_DMA
  154.         /× 对于kmalloc的cache,每个级别都对应一个普通的cache和一个dma的cache,如果支持dma则创建之 ×/
  155.         sizes->cs_dmacachep = kmem_cache_create(
  156.                     names->name_dma,
  157.                     sizes->cs_size,
  158.                     ARCH_KMALLOC_MINALIGN,
  159.                     ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
  160.                         SLAB_PANIC,
  161.                     NULL);
  162. #endif
  163.         /× 向下循环 ×/
  164.         sizes++;
  165.         names++;
  166.     }
  167.     /* 4) Replace the bootstrap head arrays */
  168.     /× 下面开始使用kmalloc申请的动态内存替换掉之前的静态变量 ×/
  169.     /× 从代码可以看出需要替换的是initarray_cache.cache和initarray_generic.cache ×/
  170.     {
  171.         struct array_cache *ptr;
  172.         /* 申请cache_cache所用的local cache的空间 */
  173.         ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
  174.         /× 复制原initarray_cache.cache到新的位置 ×/
  175.         BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
  176.         memcpy(ptr, cpu_cache_get(&cache_cache),
  177.          sizeof(struct arraycache_init));
  178.         /*
  179.          * Do not assume that spinlocks can be initialized via memcpy:
  180.          */
  181.         spin_lock_init(&ptr->lock);
  182.         /× 更新,指向动态申请的内存区 ×/
  183.         cache_cache.array[smp_processor_id()] = ptr;
  184.         /* 申请空间,用于替换initarray_generic.cache */
  185.         ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);

  186.         BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
  187.          != &initarray_generic.cache);
  188.         memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
  189.          sizeof(struct arraycache_init));
  190.         /*
  191.          * Do not assume that spinlocks can be initialized via memcpy:
  192.          */
  193.         spin_lock_init(&ptr->lock);
  194.         /× 更新,指向新申请的内存 ×/
  195.         malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
  196.          ptr;
  197.     }
  198.     /* 5) Replace the bootstrap kmem_list3's */
  199.     /× 同4一样,使用动态申请的内存,替换静态分配的slab的几个链表 ×/
  200.     {
  201.         int nid;

  202.         for_each_online_node(nid) {
  203.             init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid);

  204.             init_list(malloc_sizes[INDEX_AC].cs_cachep,
  205.                  &initkmem_list3[SIZE_AC + nid], nid);

  206.             if (INDEX_AC != INDEX_L3) {
  207.                 init_list(malloc_sizes[INDEX_L3].cs_cachep,
  208.                      &initkmem_list3[SIZE_L3 + nid], nid);
  209.             }
  210.         }
  211.     }
  212.     /× 更新slab系统的初始化的进度 ×/
  213.     g_cpucache_up = EARLY;
  214. }
继续分析一下一些子函数的代码

点击(此处)折叠或打开

  1. static void kmem_list3_init(struct kmem_list3 *parent)
  2. {
  3.     /* 全被占用的slab链表 */ 
  4.     INIT_LIST_HEAD(&parent->slabs_full);
  5.     /* 部分空闲的slab链表 */
  6.     INIT_LIST_HEAD(&parent->slabs_partial);
  7.     /* 全部空闲的slab链表 */
  8.     INIT_LIST_HEAD(&parent->slabs_free);
  9.     parent->shared = NULL;
  10.     parent->alien = NULL;
  11.     parent->colour_next = 0;
  12.     spin_lock_init(&parent->list_lock);
  13.     parent->free_objects = 0;
  14.     parent->free_touched = 0;
  15. }

点击(此处)折叠或打开

  1. /*
  2.  * For setting up all the kmem_list3s for cache whose buffer_size is same as
  3.  * size of kmem_list3.
  4.  */
  5. /× set_up_list3s(&cache_cache, CACHE_CACHE),其中CACHE_CACHE为0 ×/
  6. /* 设置cache_cache的nodeliste指向静态分配的全局变量,即slab的三个链表都使用静态全局的定义 */
  7. static void __init set_up_list3s(struct kmem_cache *cachep, int index)
  8. {
  9.     int node;
  10.     /× 遍历NUMA内存节点 ×/
  11.     for_each_online_node(node) {
  12.         /× 指向静态全局定义的slab list ×/
  13.         cachep->nodelists[node] = &initkmem_list3[index + node];
  14.         /× 设置回收时间,next_reap是两次缓存回收之间必须经历的时间间隔 ×/
  15.         cachep->nodelists[node]->next_reap = jiffies +
  16.          REAPTIMEOUT_LIST3 +
  17.          ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
  18.     }
  19. }

点击(此处)折叠或打开

  1. /*
  2.  * Calculate the number of objects and left-over bytes for a given buffer size.
  3.  */
  4. /* gfporder: 取值0~11遍历直到计算出cache的对象数量跳出循环,slab由2^gfporder个页面组成
  5.    buffer_size: 为当前cache中对象经过cache_line_size对齐后的大小
  6.    align: 是cache_line_size,按照该大小对齐
  7.    flags: 此处为0,用于标识内置slab还是外置slab
  8.    left_over: 输出值,记录slab中浪费空间的大小
  9.    num:输出值,用于记录当前cache中允许存在的对象数目
  10.  */
  11. static void cache_estimate(unsigned long gfporder, size_t buffer_size,
  12.              size_t align, int flags, size_t *left_over,
  13.              unsigned int *num)
  14. {
  15.     int nr_objs;
  16.     size_t mgmt_size;
  17.     /× PAGE_SIZE代表一个页面,slab_size记录需要多少个页面 ×/
  18.     size_t slab_size = PAGE_SIZE << gfporder;

  19.     /*
  20.      * The slab management structure can be either off the slab or
  21.      * on it. For the latter case, the memory allocated for a
  22.      * slab is used for:
  23.      *
  24.      * - The struct slab
  25.      * - One kmem_bufctl_t for each object
  26.      * - Padding to respect alignment of @align
  27.      * - @buffer_size bytes for each object
  28.      *
  29.      * If the slab management structure is off the slab, then the
  30.      * alignment will already be calculated into the size. Because
  31.      * the slabs are all pages aligned, the objects will be at the
  32.      * correct alignment when allocated.
  33.      */
  34.     /× 外置slab ×/
  35.     if (flags & CFLGS_OFF_SLAB) {
  36.         mgmt_size = 0;
  37.         /* slab中不含管理对象,全部用于存储slab对象,计算当前的对象数量 */
  38.         nr_objs = slab_size / buffer_size;
  39.         /* 如果超过阀值,则取上限 */
  40.         if (nr_objs > SLAB_LIMIT)
  41.             nr_objs = SLAB_LIMIT;
  42.     } else {
  43.         /*
  44.          * Ignore padding for the initial guess. The padding
  45.          * is at most @align-1 bytes, and @buffer_size is at
  46.          * least @align. In the worst case, this result will
  47.          * be one greater than the number of objects that fit
  48.          * into the memory allocation when taking the padding
  49.          * into account.
  50.          */
  51.         /× 内置的slab管理对象,slab管理对象与slab对象在一起。
  52.            此时slab页面中包含struct slab管理对象,kmem_bufctl_t数组和slab对象,其中kmem_bufctl_t数组个数与slab对象数量一致 ×/
  53.         nr_objs = (slab_size - sizeof(struct slab)) /
  54.               (buffer_size + sizeof(kmem_bufctl_t));

  55.         /*
  56.          * This calculated number will be either the right
  57.          * amount, or one greater than what we want.
  58.          */
  59.         /× 计算cache_line对齐后的大小,如果超出slab总的大小,则对象数减1 ×/
  60.         if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size > slab_size)
  61.             nr_objs--;
  62.         /× 判断有无超过阀值,最大取阀值 ×/
  63.         if (nr_objs > SLAB_LIMIT)
  64.             nr_objs = SLAB_LIMIT;
  65.         /* 计算cache_line对齐后,管理对象的大小 */
  66.         mgmt_size = slab_mgmt_size(nr_objs, align);
  67.     }
  68.     /× 计算得到的slab对象的数目,通过num输出 ×/
  69.     *num = nr_objs;
  70.     /× 计算当前slab中浪费的空间的大小 ×/
  71.     *left_over = slab_size - nr_objs*buffer_size - mgmt_size;
  72. }
上面用到了cache_names和malloc_sizes两个数组,它们用于表示普通cache的名字和对应的大小,并且一一对应。可以看一下代码,比较简单。










阅读(3615) | 评论(0) | 转发(2) |
给主人留下些什么吧!~~