Chinaunix首页 | 论坛 | 博客
  • 博客访问: 2112509
  • 博文数量: 438
  • 博客积分: 3871
  • 博客等级: 中校
  • 技术积分: 6075
  • 用 户 组: 普通用户
  • 注册时间: 2011-09-10 00:11
个人简介

邮箱: wangcong02345@163.com

文章分类

全部博文(438)

文章存档

2017年(15)

2016年(119)

2015年(91)

2014年(62)

2013年(56)

2012年(79)

2011年(16)

分类: LINUX

2016-11-19 14:14:02

一.总体说明

1.1 在mm/slab.c中定义了kmem_cache_s结构
  1. struct kmem_cache_s {
  2. /* 1) each alloc & free */
  3.     /* full, partial first, then free */
  4.     struct list_head    slabs_full;
  5.     struct list_head    slabs_partial;
  6.     struct list_head    slabs_free;
  7.     unsigned int        objsize;
  8.     unsigned int         flags;    /* constant flags */
  9.     unsigned int        num;    /* # of objs per slab */
  10.     spinlock_t        spinlock;
  11. #ifdef CONFIG_SMP
  12.     unsigned int        batchcount;
  13. #endif

  14. /* 2) slab additions /removals */
  15.     /* order of pgs per slab (2^n) */
  16.     unsigned int        gfporder;

  17.     /* force GFP flags, e.g. GFP_DMA */
  18.     unsigned int        gfpflags;

  19.     size_t            colour;        /* cache colouring range */
  20.     unsigned int        colour_off;    /* colour offset */
  21.     unsigned int        colour_next;    /* cache colouring */
  22.     kmem_cache_t        *slabp_cache;
  23.     unsigned int        growing;
  24.     unsigned int        dflags;        /* dynamic flags */

  25.     /* constructor func */
  26.     void (*ctor)(void *, kmem_cache_t *, unsigned long);

  27.     /* de-constructor func */
  28.     void (*dtor)(void *, kmem_cache_t *, unsigned long);

  29.     unsigned long        failures;

  30. /* 3) cache creation/removal */
  31.     char            name[CACHE_NAMELEN];
  32.     struct list_head    next;
  33. #ifdef CONFIG_SMP
  34. /* 4) per-cpu data */
  35.     cpucache_t        *cpudata[NR_CPUS];
  36. #endif
  37. #if STATS
  38.     unsigned long        num_active;
  39.     unsigned long        num_allocations;
  40.     unsigned long        high_mark;
  41.     unsigned long        grown;
  42.     unsigned long        reaped;
  43.     unsigned long         errors;
  44. #ifdef CONFIG_SMP
  45.     atomic_t        allochit;
  46.     atomic_t        allocmiss;
  47.     atomic_t        freehit;
  48.     atomic_t        freemiss;
  49. #endif
  50. #endif
  51. };

  1. typedef struct slab_s {
  2.     struct list_head    list;
  3.     unsigned long        colouroff;
  4.     void            *s_mem;           /* including colour offset */
  5.     unsigned int        inuse;        /* num of objs active in slab */
  6.     kmem_bufctl_t        free;        //注意这儿free不是left.判断结束free==BUFCLT_END?结束:不结束
  7. } slab_t;
关于free,有如下define
  1. #define    BUFCTL_END 0xffffFFFF     //用最大值+1来代表结束
  2. #define    SLAB_LIMIT 0xffffFFFE     //即slab的最大值是0xFFFFFFFE,结束值是最大值加1

二.代码分析
Kernel hacking  --->
  [*]   Debug memory allocations  -->  CONFIG_DEBUG_SLAB

  1. static kmem_cache_t cache_cache = {
  2.     slabs_full:       LIST_HEAD_INIT(cache_cache.slabs_full),
  3.     slabs_partial:    LIST_HEAD_INIT(cache_cache.slabs_partial),
  4.     slabs_free:       LIST_HEAD_INIT(cache_cache.slabs_free),
  5.     objsize:          sizeof(kmem_cache_t),
  6.     flags:            SLAB_NO_REAP,
  7.     spinlock:         SPIN_LOCK_UNLOCKED,
  8.     colour_off:       L1_CACHE_BYTES,
  9.     name:             "kmem_cache",
  10. };
2.1 在mm/slab.c中L415-->kmem_cache_init
start_kernel-->kmem_cache_init

  1. void __init kmem_cache_init(void)
  2. {
  3.     size_t left_over;

  4.     init_MUTEX(&cache_chain_sem);
  5. //#define cache_chain (cache_cache.next), 初始化高速缓存链表cache_cache.next
  6.     INIT_LIST_HEAD(&cache_chain);
  7.  //计算对象的数量以及消耗的字节数
  8.     kmem_cache_estimate(0, cache_cache.objsize, 0 &left_over, &cache_cache.num);
  9.     if (!cache_cache.num)
  10.         BUG();
  11.     cache_cache.colour = left_over/cache_cache.colour_off;     //colour_off是L1高速缓存对齐时可用的不同高速缓存行数目,执行后colour=0
  12.     cache_cache.colour_next = 0;                               //colour_next表明要使用的下个高速缓存行,从0开始
  13. }
执行后:NO_DEBUG时cache_cache的地址=0xc02cf7c0,sizeof(cache_cache)=248
  1. $1 = {slabs_full = {next = 0xc02cf7c0 <cache_cache>, prev = 0xc02cf7c0 <cache_cache>}, slabs_partial = {next = 0xc02cf7c8 <cache_cache+8>, prev = 0xc02cf7c8 <cache_cache+8>},
  2.   slabs_free = {next = 0xc02cf7d0 <cache_cache+16>, prev = 0xc02cf7d0 <cache_cache+16>}, objsize = 248, flags = 4096, num = 16, spinlock = {lock = 1, magic = 3735899821},
  3.   batchcount = 0, gfporder = 0, gfpflags = 0, colour = 0, colour_off = 128, colour_next = 0, slabp_cache = 0x0, growing = 0, dflags = 0, ctor = 0x0, dtor = 0x0, failures = 0,
  4.   name = "kmem_cache\000\000\000\000\000\000\000\000\000", next = {next = 0xc02cf830 <cache_cache+112>, prev = 0xc02cf830 <cache_cache+112>}, cpudata = {0x0 <repeats 32 times>}}
2.1.1 在mm/slab.c中L387-->kmem_cache_init-->kmem_cache_estimate
gfporder=0, size=sizeof(kmem_cache_t)=0xF8=248,flags=0x0 DEUB时size=288
  1. static void kmem_cache_estimate (unsigned long gfporder, size_t sizeint flags, size_t *left_over, unsigned int *num)
  2. {
  3.     int i;
  4.     size_t wastage = PAGE_SIZE<<gfporder;       //这儿gfporder=0,所以wastage=4096=0x1000
  5.     size_t extra = 0;
  6.     size_t base = 0;

  7.     if (!(flags & CFLGS_OFF_SLAB)) {           //CFLAGS_OFF_SLAB表示slab描述符存储在slab之外
  8.         base = sizeof(slab_t);                 //base=24
  9.         extra = sizeof(kmem_bufctl_t);         //extra=4
  10.     }
  11.     i = 0;
  12.     while (i*size + L1_CACHE_ALIGN(base+i*extra) <= wastage)  //wastage=4096,size=248
  13.         i++;                                                  //执行后i=17
  14.     if (> 0)
  15.         i--;                                                  //执行后i=16 

  16.     if (> SLAB_LIMIT)                                       //SLAB_LIMIT=0xffffFFFE
  17.         i = SLAB_LIMIT;

  18.     *num = i;
  19.     wastage -= i*size;                         //i=16,size=248,i*size=3968,执行后wastage=128 //DEBUG时wastage=352
  20.     wastage -= L1_CACHE_ALIGN(base+i*extra);   //执行后wastage=0 //DEBUG时:wastage=224
  21.     *left_over = wastage;                      //执行后*left_over=0  //DEBUG时:*left_over=224
  22. }


2.2 start_kernel-->mem_init之后-->kmem_cache_sizes_init
  1. /* Initialisation - setup remaining internal and general caches.
  2.  * Called after the gfp() functions have been enabled, and before smp_init().
  3.  */
  4. void __init kmem_cache_sizes_init(void)
  5. {
  6.     cache_sizes_t *sizes = cache_sizes;    //cache_sizes最小32Byte, 最大131072=128KB=32个pages
  7.     char name[20];

  8.     if (num_physpages > (32 << 20) >> PAGE_SHIFT)         //num_physpages=0x3fffe即物理内存的最大页帧>大于32M的页帧
  9.         slab_break_gfp_order = BREAK_GFP_ORDER_HI;        //执行后slab_break_gfp_order=2

  10. //cs_size={32,64,128,256,512,1024,2048,4096,8192,16384,32768,65536,131072}
  11. //
  12.     do {
  13.         sprintf(name,"size-%Zd",sizes->cs_size);
  14. //执行后sizes->cs_cachep=0xc210b080
  15.         if (!(sizes->cs_cachep kmem_cache_create(name, sizes->cs_size, 0, SLAB_HWCACHE_ALIGN, NULL, NULL))) {
  16.             BUG();
  17.         }

  18.         /* Inc off-slab bufctl limit until the ceiling is hit. */
  19.         if (!(OFF_SLAB(sizes->cs_cachep))) {
  20.             offslab_limit = sizes->cs_size-sizeof(slab_t);   //执行后offslab_limit=4
  21.             offslab_limit /= 2;                              //执行后offslab_limit=2
  22.         }
  23.         sprintf(name, "size-%Zd(DMA)",sizes->cs_size);   //下面开始对size-32(DMA)的slab进行初始化
  24.         sizes->cs_dmacachep = kmem_cache_create(name, sizes->cs_size, 0SLAB_CACHE_DMA|SLAB_HWCACHE_ALIGN, NULL, NULL);
  25.         if (!sizes->cs_dmacachep)
  26.             BUG();
  27.         sizes++;
  28.     } while (sizes->cs_size);
  29. }

在mm/slab.c中L592 kmem_cache_sizes_init-->kmem_cache_create
name="size-32",  size=0x20, offset=0x0, flags=SLAB_HWCACHE_ALIGN=0x2000, ctor=NULL, dtor=NULL

  1. kmem_cache_t kmem_cache_create (const char *name, size_t size, size_t offset,
  2.     unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
  3.     void (*dtor)(void*, kmem_cache_t *, unsigned long))
  4. {
  5.     const char *func_nm = KERN_ERR "kmem_create: ";
  6.     size_t left_over, align, slab_size;
  7.     kmem_cache_t *cachep = NULL;

  8.     ...    //省略一些判断

  9.     /* Get cache's description obj. */
  10.     cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);  //执行后cache0xc210b080
  11.     if (!cachep)
  12.         goto opps;
  13.     memset(cachep, 0, sizeof(kmem_cache_t));

  14.     /* Check that size is in terms of words. This is needed to avoid
  15.      * unaligned accesses for some archs when redzoning is used, and makes
  16.      * sure any on-slab bufctl's are also correctly aligned.
  17.      */
  18.     if (size & (BYTES_PER_WORD-1)) {
  19.         size += (BYTES_PER_WORD-1);
  20.         size &= ~(BYTES_PER_WORD-1);
  21.         printk("%sForcing size word alignment - %s\n", func_nm, name);
  22.     }

  23.     align = BYTES_PER_WORD;                       //执行后align=4
  24.     if (flags & SLAB_HWCACHE_ALIGN)
  25.         align = L1_CACHE_BYTES;                   //执行后align=128=0x80

  26.     /* Determine if the slab management is 'on' or 'off' slab. */
  27.     if (size >= (PAGE_SIZE>>3))                  //此时size=32
  28.         flags |= CFLGS_OFF_SLAB;

  29.     if (flags & SLAB_HWCACHE_ALIGN) {      
  30.         while (size < align/2)
  31.             align /= 2;                               //原先align=32执行后align=64
  32.         size = (size+align-1)&(~(align-1));           //原先size=32执行后size=64
  33.         //这儿有点不太理解,为什么需要将32扩展为64,将4个放进一个cache中不行吗?为什么必须得是2个放进一个cache中?
  34.     }
  35.     //开启DEBUG_SLAB时,flags=0xC00, size=0x28=40  -->这儿不关心这种情况
  36.     //不开启DEBUG_SLAB时,flags=0x200,size=64,align=64 
  37.     do {
  38.         unsigned int break_flag = 0;
  39. cal_wastage:
  40.     //计算gfporder个page中,这儿是1个page可以放多少个obj
  41.         kmem_cache_estimate(cachep->gfporder, size, flags&left_over, &cachep->num)//执行后left_over=128,cachep->num=58
  42.         if (break_flag)
  43.             break;
  44.         if (cachep->gfporder >= MAX_GFP_ORDER)
  45.             break;
  46.         if (!cachep->num)
  47.             goto next;
  48.         if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) {
  49.             /* Oops, this num of objs will cause problems. */
  50.             cachep->gfporder--;
  51.             break_flag++;
  52.             goto cal_wastage;
  53.         }
  54.         if (cachep->gfporder >= slab_break_gfp_order)
  55.             break;

  56.         if ((left_over*8) <= (PAGE_SIZE<<cachep->gfporder))       //left_over=112,如果内部碎片<(缓存)/8是可以接受的
  57.             break;    /* Acceptable internal fragmentation. */
  58. next:
  59.         cachep->gfporder++;
  60.     } while (1);

  61.     if (!cachep->num) {
  62.         printk("kmem_cache_create: couldn't create cache %s.\n", name);
  63.         kmem_cache_free(&cache_cache, cachep);
  64.         cachep = NULL;
  65.         goto opps;
  66.     }
  67.     slab_size = L1_CACHE_ALIGN(cachep->num*sizeof(kmem_bufctl_t)+sizeof(slab_t));   //slab_size=256

  68.     if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
  69.         flags &= ~CFLGS_OFF_SLAB;
  70.         left_over -= slab_size;
  71.     }

  72.     /* Offset must be a multiple of the alignment. */
  73.     offset += (align-1);                                            //执行后offset=63,align=64
  74.     offset &= ~(align-1);                                           //执行后offset=0,align=64
  75.     if (!offset)
  76.         offset = L1_CACHE_BYTES;                                    //执行后offset=128=0x80
  77.     cachep->colour_off = offset;
  78.     cachep->colour = left_over/offset;                              //left_over=128,offset=128,执行后cachep->colour=1                      

  79.     /* init remaining fields */
  80.     if (!cachep->gfporder && !(flags & CFLGS_OFF_SLAB))
  81.         flags |= CFLGS_OPTIMIZE;

  82.     cachep->flags = flags;
  83.     cachep->gfpflags = 0;
  84.     if (flags & SLAB_CACHE_DMA)
  85.         cachep->gfpflags |= GFP_DMA;
  86.     spin_lock_init(&cachep->spinlock);
  87.     cachep->objsize = size;
  88.     INIT_LIST_HEAD(&cachep->slabs_full);
  89.     INIT_LIST_HEAD(&cachep->slabs_partial);
  90.     INIT_LIST_HEAD(&cachep->slabs_free);

  91.     if (flags & CFLGS_OFF_SLAB)
  92.         cachep->slabp_cache = kmem_find_general_cachep(slab_size,0);
  93.     cachep->ctor = ctor;
  94.     cachep->dtor = dtor;
  95.     /* Copy name over so we don't have problems with unloaded modules */
  96.     strcpy(cachep->name, name);

  97. #ifdef CONFIG_SMP
  98.     if (g_cpucache_up)
  99.         enable_cpucache(cachep);
  100. #endif
  101.     /* Need the semaphore to access the chain. */
  102.     down(&cache_chain_sem);
  103.     {
  104.         struct list_head *p;

  105.         list_for_each(p, &cache_chain) {
  106.             kmem_cache_t *pc = list_entry(p, kmem_cache_t, next);

  107.             /* The name field is constant - no lock needed. */
  108.             if (!strcmp(pc->name, name))
  109.                 BUG();
  110.         }
  111.     }

  112.     /* There is no reason to lock our new cache before we
  113.      * link it in - no one knows about it yet...
  114.      */
  115.     list_add(&cachep->next, &cache_chain);
  116.     up(&cache_chain_sem);
  117. opps:
  118.     return cachep;
  119. }
执行后
  1. (gdb) p *sizes->cs_cachep   //0xc210b080
  2. $3 = {slabs_full = {next = 0xc210b080, prev = 0xc210b080}, slabs_partial = {next = 0xc210b088, prev = 0xc210b088}, slabs_free = {next = 0xc210b090, prev = 0xc210b090},
  3.   objsize = 40, flags = 134144, num = 90, spinlock = {lock = 1, magic = 0xdead4ead}, batchcount = 0, gfporder = 0, gfpflags = 0, colour = 0, colour_off = 128, colour_next = 0,
  4.   slabp_cache = 0x0, growing = 0, dflags = 0, ctor = 0x0, dtor = 0x0, failures = 0, name = "size-32", '\000' <repeats 12 times>, next = {next = 0xc02d0630 <cache_cache+112>,
  5.     prev = 0xc210b210}, cpudata = {0x0 <repeats 32 times>}, num_active = 0, num_allocations = 0, high_mark = 0, grown = 0, reaped = 0, errors = 0, allochit = {counter = 0},
  6.   allocmiss = {counter = 0}, freehit = {counter = 0}, freemiss = {counter = 0}}

  7. (gdb) p *sizes->cs_dmacachep  //0xc210b1a0
  8. $4 = {slabs_full = {next = 0xc210b1a0, prev = 0xc210b1a0}, slabs_partial = {next = 0xc210b1a8, prev = 0xc210b1a8}, slabs_free = {next = 0xc210b1b0, prev = 0xc210b1b0},
  9.   objsize = 40, flags = 150528, num = 90, spinlock = {lock = 1, magic = 0xdead4ead}, batchcount = 0, gfporder = 0, gfpflags = 1, colour = 0, colour_off = 128, colour_next = 0,
  10.   slabp_cache = 0x0, growing = 0, dflags = 0, ctor = 0x0, dtor = 0x0, failures = 0, name = "size-32(DMA)\000\000\000\000\000\000\000", next = {next = 0xc210b0f0,
  11.     prev = 0xc02d0630 <cache_cache+112>}, cpudata = {0x0 <repeats 32 times>}, num_active = 0, num_allocations = 0, high_mark = 0, grown = 0, reaped = 0, errors = 0, allochit = {
  12.     counter = 0}, allocmiss = {counter = 0}, freehit = {counter = 0}, freemiss = {counter = 0}}

  1. (gdb) p *sizes->cs_cachep   //0xc210b2c0
  2. $6 = {slabs_full = {next = 0xc210b2c0, prev = 0xc210b2c0}, slabs_partial = {next = 0xc210b2c8, prev = 0xc210b2c8}, slabs_free = {next = 0xc210b2d0, prev = 0xc210b2d0},
  3.   objsize = 72, flags = 134144, num = 53, spinlock = {lock = 1, magic = 0xdead4ead}, batchcount = 0, gfporder = 0, gfpflags = 0, colour = 0, colour_off = 128, colour_next = 0,
  4.   slabp_cache = 0x0, growing = 0, dflags = 0, ctor = 0x0, dtor = 0x0, failures = 0, name = "size-64", '\000' <repeats 12 times>, next = {next = 0xc210b210, prev = 0xc210b450},
  5.   cpudata = {0x0 <repeats 32 times>}, num_active = 0, num_allocations = 0, high_mark = 0, grown = 0, reaped = 0, errors = 0, allochit = {counter = 0}, allocmiss = {counter = 0},
  6.   freehit = {counter = 0}, freemiss = {counter = 0}}

  7. (gdb) p *sizes->cs_dmacachep //0xc210b3e0
  8. $8 = {slabs_full = {next = 0xc210b3e0, prev = 0xc210b3e0}, slabs_partial = {next = 0xc210b3e8, prev = 0xc210b3e8}, slabs_free = {next = 0xc210b3f0, prev = 0xc210b3f0},
  9.   objsize = 72, flags = 150528, num = 53, spinlock = {lock = 1, magic = 0xdead4ead}, batchcount = 0, gfporder = 0, gfpflags = 1, colour = 0, colour_off = 128, colour_next = 0,
  10.   slabp_cache = 0x0, growing = 0, dflags = 0, ctor = 0x0, dtor = 0x0, failures = 0, name = "size-64(DMA)\000\000\000\000\000\000\000", next = {next = 0xc210b330,
  11.     prev = 0xc02d0630 <cache_cache+112>}, cpudata = {0x0 <repeats 32 times>}, num_active = 0, num_allocations = 0, high_mark = 0, grown = 0, reaped = 0, errors = 0, allochit = {
  12.     counter = 0}, allocmiss = {counter = 0}, freehit = {counter = 0}, freemiss = {counter = 0}}

  1. (gdb) p *sizes->cs_cachep   //0xc210b500
  2. $11 = {slabs_full = {next = 0xc210b500, prev = 0xc210b500}, slabs_partial = {next = 0xc210b508, prev = 0xc210b508}, slabs_free = {next = 0xc210b510, prev = 0xc210b510},
  3.   objsize = 136, flags = 134144, num = 28, spinlock = {lock = 1, magic = 3735899821}, batchcount = 0, gfporder = 0, gfpflags = 0, colour = 0, colour_off = 128, colour_next = 0,
  4.   slabp_cache = 0x0, growing = 0, dflags = 0, ctor = 0x0, dtor = 0x0, failures = 0, name = "size-128", '\000' <repeats 11 times>, next = {next = 0xc210b450, prev = 0xc210b690},
  5.   cpudata = {0x0 <repeats 32 times>}, num_active = 0, num_allocations = 0, high_mark = 0, grown = 0, reaped = 0, errors = 0, allochit = {counter = 0}, allocmiss = {counter = 0},
  6.   freehit = {counter = 0}, freemiss = {counter = 0}}

  7. (gdb) p *sizes->cs_dmacachep  //0xc210b620
  8. $13 = {slabs_full = {next = 0xc210b620, prev = 0xc210b620}, slabs_partial = {next = 0xc210b628, prev = 0xc210b628}, slabs_free = {next = 0xc210b630, prev = 0xc210b630},
  9.   objsize = 136, flags = 150528, num = 28, spinlock = {lock = 1, magic = 3735899821}, batchcount = 0, gfporder = 0, gfpflags = 1, colour = 0, colour_off = 128, colour_next = 0,
  10.   slabp_cache = 0x0, growing = 0, dflags = 0, ctor = 0x0, dtor = 0x0, failures = 0, name = "size-128(DMA)\000\000\000\000\000\000", next = {next = 0xc210b570,
  11.     prev = 0xc02d0630 <cache_cache+112>}, cpudata = {0x0 <repeats 32 times>}, num_active = 0, num_allocations = 0, high_mark = 0, grown = 0, reaped = 0, errors = 0, allochit = {
  12.     counter = 0}, allocmiss = {counter = 0}, freehit = {counter = 0}, freemiss = {counter = 0}}


三. kmem_cache_alloc的过程
调用:cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
  1. flags=GFP_KERNEL=0x1F0
  2. #define GFP_KERNEL    (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
  3.                            0x20          0x10      0x40          0x80        0x100
3.1 在mm/slab.c中
kmem_cache_sizes_init-->kmem_cache_create-->kmem_cache_alloc
  1. void * kmem_cache_alloc (kmem_cache_t *cachep, int flags)
  2. {
  3.     return __kmem_cache_alloc(cachep, flags);
  4. }
只是封装了一下__kmem_cache_alloc而己
  1. static inline void * __kmem_cache_alloc (kmem_cache_t *cachep, int flags)
  2. {
  3.     unsigned long save_flags;
  4.     void* objp;
  5. //检查标志位是不是合法
  6.     kmem_cache_alloc_head(cachep, flags);    //3.2
  7. try_again:
  8.     local_irq_save(save_flags);
  9. #ifdef CONFIG_SMP
  10.     {
  11.         cpucache_t *cc = cc_data(cachep);    //执行后cc=0x0

  12.         if (cc) {
  13.             if (cc->avail) {
  14.                 STATS_INC_ALLOCHIT(cachep);
  15.                 objp = cc_entry(cc)[--cc->avail];
  16.             } else {
  17.                 STATS_INC_ALLOCMISS(cachep);
  18.                 objp = kmem_cache_alloc_batch(cachep,cc,flags);
  19.                 if (!objp)
  20.                     goto alloc_new_slab_nolock;
  21.             }
  22.         } else {
  23.             spin_lock(&cachep->spinlock);         //smp需要加锁了
  24.             objp = kmem_cache_alloc_one(cachep);  //3.3如果从slab->free中分配不到slab就跳到alloc_new_slab
  25.             spin_unlock(&cachep->spinlock);       //alloc_new_slab完之后事slab->free
  26.         }
  27.     }
  28. #else
  29.     objp = kmem_cache_alloc_one(cachep);
  30. #endif
  31.     local_irq_restore(save_flags);
  32.     return objp;
  33. alloc_new_slab:                      //kmem_cache_alloc_one是一个宏定义,如果slabs->free中也没有就跳到这儿
  34. #ifdef CONFIG_SMP
  35.     spin_unlock(&cachep->spinlock);
  36. alloc_new_slab_nolock:
  37. #endif
  38.     local_irq_restore(save_flags);
  39.     if (kmem_cache_grow(cachep, flags))   //3.4
  40.         goto try_again;                   //申请slab之后再跳回去,到上面的try_agin
  41.     return NULL;
  42. }
3.2 只是检查flags是不是合法
kmem_cache_alloc_head(cachep, flags);
  1. static inline void kmem_cache_alloc_head(kmem_cache_t *cachep, int flags)
  2. {
  3.     if (flags & SLAB_DMA) {                //flags=GFP_KERNEL=0x1F0, SLAB_DMA=0x01
  4.         if (!(cachep->gfpflags & GFP_DMA))
  5.             BUG();
  6.     } else {
  7.         if (cachep->gfpflags & GFP_DMA)   //cachep->gfpflags=0x0
  8.             BUG();
  9.     }
  10. }

3.3 
这个函数要调用两次,第1次时因为slabs_free是空的,所以要goto alloc_new_slab
第2次时slabs_free不为空,可以分配到kmem_cache
  1. #define kmem_cache_alloc_one(cachep)                
  2. ({                                                 
  3.     struct list_head * slabs_partial, * entry;     
  4.     slab_t *slabp;                                                                  
  5.     slabs_partial = &(cachep)->slabs_partial;  //取slabs_partial的头结点
  6.     entry = slabs_partial->next;               //因为slabs_partial此时还为空,所以这个entry=NULL
  7.     if (unlikely(entry == slabs_partial)) {    //判断slabs_partial的链表是否为空,这儿是空的
  8.         struct list_head * slabs_free;            
  9.         slabs_free = &(cachep)->slabs_free;    //取slabs_free的头结点       
  10.         entry = slabs_free->next;              //因为slabs_free此时还为空,所以这个entry=NULL        
  11.         if (unlikely(entry == slabs_free))     //判断slabs_free的链表是否为空,这儿是空的     
  12.             goto alloc_new_slab;               //所以要跳到alloc_new_slab中              
  13.         list_del(entry);                            
  14.         list_add(entry, slabs_partial);             
  15.     }                                               
  16.                                                    
  17.     slabp = list_entry(entry, slab_t, list);        
  18.     kmem_cache_alloc_one_tail(cachep, slabp);     //3.3.1
  19. })

3.4
  1. static int kmem_cache_grow (kmem_cache_t * cachep, int flags)
  2. {
  3.     slab_t    *slabp;
  4.     struct page    *page;
  5.     void        *objp;
  6.     size_t         offset;
  7.     unsigned int     i, local_flags;
  8.     unsigned long     ctor_flags;
  9.     unsigned long     save_flags;

  10.     ...  //省略一些判断

  11.     ctor_flags = SLAB_CTOR_CONSTRUCTOR;        //执行后ctor_flags=0x1
  12.     local_flags = (flags & SLAB_LEVEL_MASK);   //执行后local_flags=0x1f0
  13.     if (local_flags == SLAB_ATOMIC)
  14.         ctor_flags |= SLAB_CTOR_ATOMIC;

  15.     /* About to mess with non-constant members - lock. */
  16.     spin_lock_irqsave(&cachep->spinlock, save_flags);

  17.     /* Get colour for the slab, and cal the next value. */
  18.     offset = cachep->colour_next;              //此时cachep->colour_next=0x0执行后offset=0x0
  19.     cachep->colour_next++;                     //执行后cachep->colour_next=0x1
  20.     if (cachep->colour_next >= cachep->colour//此时colour_next=0x1, colour=0x1
  21.         cachep->colour_next = 0;               //执行后cachep->colour_next=0x0   
  22.     offset *= cachep->colour_off;              //执行前offset=0x0,所以执行后offset=0x0
  23.     cachep->dflags |= DFLGS_GROWN;             //执行后cachep->dflags=0x01

  24.     cachep->growing++;                         //执行前growing=0x0,所以执行后growing=0x1
  25.     spin_unlock_irqrestore(&cachep->spinlock, save_flags);

  26.     /* Get mem for the objs. */
  27.     if (!(objp = kmem_getpages(cachep, flags)))   //从zone_normal处分配一页内存0xc210b000
  28.         goto failed;

  29.     //offset=0x0,local_flags=0x1f0
  30.     if (!(slabp = kmem_cache_slabmgmt(cachep, objp, offset, local_flags))) //3.4.1执行后slabp指向0xc210b000
  31.         goto opps1;

  32.     /* I hope this is OK. */
  33.     i = 1 << cachep->gfporder;                    //执行前cachep->gfporder=0x0,所以执行后i=0x1
  34.     page = virt_to_page(objp);                    //将虚地址objp转为mem_map中的管理page
  35.     do {
  36.         SET_PAGE_CACHE(page, cachep);             //page->list.next=cachep
  37.         SET_PAGE_SLAB(page, slabp);               //page->list.pre=slabp
  38.         PageSetSlab(page);                        //将page->flags设为PG_slab
  39.         page++;
  40.     } while (--i);

  41.     kmem_cache_init_objs(cachep, slabp, ctor_flags);    //3.4.2

  42.     spin_lock_irqsave(&cachep->spinlock, save_flags);
  43.     cachep->growing--;                            //执行后cachep->growing=0x0

  44.     /* Make slab active. */
  45.     list_add_tail(&slabp->list, &cachep->slabs_free);  //把申请到的slab加到slabs_frees中
  46.     STATS_INC_GROWN(cachep);                     //使cachep->grown++,执行后cachep->grown=0x01
  47.     cachep->failures = 0;

  48.     spin_unlock_irqrestore(&cachep->spinlock, save_flags);
  49.     return 1;
  50. opps1:
  51.     kmem_freepages(cachep, objp);
  52. failed:
  53.     spin_lock_irqsave(&cachep->spinlock, save_flags);
  54.     cachep->growing--;
  55.     spin_unlock_irqrestore(&cachep->spinlock, save_flags);
  56.     return 0;
  57. }

3.4.1 
参数说明: objp是刚从zone_normal中分配的一页内存的首地址0xc210b000
colour_off=0x0, local_flags=0x1F0=GFP_KERNEL
  1. static inline slab_t * kmem_cache_slabmgmt (kmem_cache_t *cachep,
  2.             void *objp, int colour_off, int local_flags)
  3. {
  4.     slab_t *slabp;
  5.     
  6.     if (OFF_SLAB(cachep)) {
  7.         /* Slab management obj is off-slab. */
  8.         slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
  9.         if (!slabp)
  10.             return NULL;
  11.     } else {
  12.         slabp = objp+colour_off;    //执行前colour_off=0x0,执行后slabp是申请到的一页内存的首地址0xc210b000
  13.         colour_off += L1_CACHE_ALIGN(cachep->num * sizeof(kmem_bufctl_t) + sizeof(slab_t)); //执行后colour_off=0x80=128
  14.     }
  15.     slabp->inuse = 0;
  16.     slabp->colouroff = colour_off;
  17.     slabp->s_mem = objp+colour_off;
  18.     //执行后{list = {next = 0x0, prev = 0x0}, colouroff = 128, s_mem = 0xc210b080, inuse = 0, free = 0}
  19.     return slabp;
  20. }

3.4.2 
  1. static inline void kmem_cache_init_objs (kmem_cache_t * cachep,
  2.             slab_t * slabp, unsigned long ctor_flags)
  3. {
  4.     int i;

  5.     for (i = 0; i < cachep->num; i++) {                //cachep->num=13
  6.         void* objp = slabp->s_mem+cachep->objsize*i;   //0xc210b080
  7.         if (cachep->ctor)
  8.             cachep->ctor(objp, cachep, ctor_flags);    //这儿不执行
  9.         slab_bufctl(slabp)[i] = i+1;
  10.     }
  11.     slab_bufctl(slabp)[i-1] = BUFCTL_END;          //#define slab_bufctl(slabp) (kmem_bufctl_t *)(((slab_t*)slabp)+1))
  12.     slabp->free = 0;
  13. }
执行后:
  1. (gdb) p *slabp
    $19 = {list = {next = 0x0, prev = 0x0}, colouroff = 128, s_mem = 0xc210b080, inuse = 0, free = 0}

  2. (gdb) x /64wx slabp
  3. 0xc210b000:    0x00000000    0x00000000    0x00000080    0xc210b080
  4. 0xc210b010:    0x00000000    0x00000000    0x00000001    0x00000002
  5. 0xc210b020:    0x00000003    0x00000004    0x00000005    0x00000006
  6. 0xc210b030:    0x00000007    0x00000008    0x00000009    0x0000000a
  7. 0xc210b040:    0x0000000b    0x0000000c    0x0000000d    0x0000000e
  8. 0xc210b050:    0x0000000f    0xffffffff    0x00000000    0x00000000      //由原先的0x00000010-->0xFFFFFFFF代表是END
注意:slab_bufctl(slabp)中存的是下一项的值:
例如第0项存的是1,代表下一个是第1项;  第16项存的是0xFFFFFFFF代表结束
3.3.1 
下面是第2次进入kmem_cache_alloc_one时会调用kmem_cache_alloc_one_tail
  1. static inline void * kmem_cache_alloc_one_tail (kmem_cache_t *cachep, slab_t *slabp)
  2. {
  3.     void *objp;
  4.     //slabp=0xc210b080
  5.     //里面的内容是 {list = {next = 0xc02d05c8 , prev = 0xc02d05c8 }, colouroff = 128, s_mem = 0xc210b080, inuse = 0, free = 0}
  6.     STATS_INC_ALLOCED(cachep);
  7.     STATS_INC_ACTIVE(cachep);
  8.     STATS_SET_HIGH(cachep);

  9.     slabp->inuse++;                                    //执行后slabp->inuse=1
  10.     objp = slabp->s_mem + slabp->free*cachep->objsize//目前slabp->free=0,所以执行后objp=0xc210b080
  11.     slabp->free=slab_bufctl(slabp)[slabp->free];       //执行后slabp->free=1,slab_bufctl(slabp)[slabp->free]是下一项的值

  12.     if (unlikely(slabp->free == BUFCTL_END)) {         //如果slabp->free到头了,说明没有剩余的了,如果当前是第16项,即slab可以分配的最后一项
  13.         list_del(&slabp->list);                        //就把slabp->list清空,
  14.         list_add(&slabp->list, &cachep->slabs_full);   //放到slabs_full中。然后下一次kmem_cache_alloc_one中知道slabs->free与slabs->part都为空。
  15.     }
  16.     return objp;                                       //返回0xc210b080
  17. }
解释kmem_cache_t的获取,即源码中的objp句,如下图所示:




四.总结
4.1 一个页面内的内容如下所示: 其中0xc210b000是从buddy中申请的一页内存

L1_CACHE_ALIGN=红色部分=slab+ slab_bufctl所占的内存共4*8*4=128个字节
slab占了24个字节,那么能表示slab_butctl的=(128-24)/4=26个
所以得出结论,L1_CACHE_ALIGN1内存最多能表示26个kmem_cache_t结构
但是kmem_cache_t本身占用248个字节,4096/248=16.5个,所以用不完。

4.3 kmem_cache_sizes_init结束后的cache_cache如下所示:
  1. (gdb) p cache_cache
  2. $4 = {slabs_full = {next = 0xc210b000, prev = 0xc210b000}, slabs_partial = {next = 0xc210f000, prev = 0xc210f000}, slabs_free = {next = 0xc02cf7d0 <cache_cache+16>,
  3.     prev = 0xc02cf7d0 <cache_cache+16>}, objsize = 248, flags = 4096, num = 16, spinlock = {lock = 1, magic = 3735899821}, batchcount = 0, gfporder = 0, gfpflags = 0, colour = 0,
  4.   colour_off = 128, colour_next = 0, slabp_cache = 0x0, growing = 0, dflags = 1, ctor = 0x0, dtor = 0x0, failures = 0, name = "kmem_cache\000\000\000\000\000\000\000\000\000",
  5.   next = {next = 0xc210f9a8, prev = 0xc210b0f0}, cpudata = {0x0 <repeats 32 times>}}

4.3 最终初始化结束后的内存结构如下所示:

slab_full上挂着第1次申请的1页内存,并且里面的kmem_cache_t结构体全部占用,
     size-32, size-32(DMA)  ------  size-4096, size-4096(DMA)
slab_partial上挂着第1次申请的1页内存,并且里面的kmem_cache_t结构体只有部分被占用
     size-8192, size-8192(DMA)  ------  size-131072, size-131072(DMA)


阅读(1591) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~