一.总体分析
二.代码分析
2.1 在mm/slab.c中L1535 kmalloc
-
void * kmalloc (size_t size, int flags)
-
{
-
cache_sizes_t *csizep = cache_sizes;
-
//扫描cache_sizes数组,找到比欲分配的size还大的csizep
-
for (; csizep->cs_size; csizep++) {
-
if (size > csizep->cs_size)
-
continue;
-
//cache_sizes包含cs_cachep与cs_dmacachep,通过flags=GFP_DMA来区分
-
return __kmem_cache_alloc(flags & GFP_DMA ? csizep->cs_dmacachep : csizep->cs_cachep, flags); //2.2
-
}
-
return NULL;
-
}
start_kernel-->vfs_caches_init-->mnt_init-->init_mount_tree-->do_kern_mount-->set_devname
size=7(‘rootfs\0' strlen(name)+1), flags=GFP_KERNEL
2.2
在mm/slab.c中L1318 kmalloc-->__kmem_cache_alloc
-
static inline void * __kmem_cache_alloc (kmem_cache_t *cachep, int flags)
-
{
-
unsigned long save_flags;
-
void* objp;
-
//只是检查标志位是不是合法,没有什么作用
-
kmem_cache_alloc_head(cachep, flags);
-
try_again:
-
local_irq_save(save_flags);
-
#ifdef CONFIG_SMP
-
{
-
cpucache_t *cc = cc_data(cachep); //执行后cc=0x0
-
-
if (cc) {
-
if (cc->avail) {
-
STATS_INC_ALLOCHIT(cachep);
-
objp = cc_entry(cc)[--cc->avail];
-
} else {
-
STATS_INC_ALLOCMISS(cachep);
-
objp = kmem_cache_alloc_batch(cachep,cc,flags);
-
if (!objp)
-
goto alloc_new_slab_nolock;
-
}
-
} else {
-
spin_lock(&cachep->spinlock); //smp需要加锁了
-
objp = kmem_cache_alloc_one(cachep); //2.3如果从slab->free中分配不到slab就跳到alloc_new_slab
-
spin_unlock(&cachep->spinlock); //alloc_new_slab完之后事slab->free
-
}
-
}
-
#else
-
objp = kmem_cache_alloc_one(cachep);
-
#endif
-
local_irq_restore(save_flags);
-
return objp;
-
alloc_new_slab: //kmem_cache_alloc_one是一个宏定义,如果slabs->free中也没有就跳到这儿
-
#ifdef CONFIG_SMP
-
spin_unlock(&cachep->spinlock);
-
alloc_new_slab_nolock:
-
#endif
-
local_irq_restore(save_flags);
-
if (kmem_cache_grow(cachep, flags)) //2.4
-
goto try_again; //申请slab之后再跳回去,到上面的try_agin
-
return NULL;
-
}
2.3 这个函数要调用两次,第1次时因为slabs_free是空的,所以要goto alloc_new_slab
第2次时slabs_free不为空,可以分配到kmem_cache
-
#define kmem_cache_alloc_one(cachep)
-
({
-
struct list_head * slabs_partial, * entry;
-
slab_t *slabp;
-
slabs_partial = &(cachep)->slabs_partial; //取slabs_partial的头结点
-
entry = slabs_partial->next; //因为slabs_partial此时还为空,所以这个entry=NULL
-
if (unlikely(entry == slabs_partial)) { //判断slabs_partial的链表是否为空,这儿是空的
-
struct list_head * slabs_free;
-
slabs_free = &(cachep)->slabs_free; //取slabs_free的头结点
-
entry = slabs_free->next; //因为slabs_free此时还为空,所以这个entry=NULL
-
if (unlikely(entry == slabs_free)) //判断slabs_free的链表是否为空,这儿是空的
-
goto alloc_new_slab; //所以要跳到alloc_new_slab中
-
list_del(entry);
-
list_add(entry, slabs_partial);
-
}
-
-
slabp = list_entry(entry, slab_t, list);
-
kmem_cache_alloc_one_tail(cachep, slabp); //2.3.1
-
})
在函数kmalloc中查找到的cachep是size-32的这个cachep
-
(gdb) p *cachep 地址=0xc210b080
-
$6 = {slabs_full = {next = 0xc210b080, prev = 0xc210b080}, slabs_partial = {next = 0xc210b088, prev = 0xc210b088}, slabs_free = {next = 0xc210b090, prev = 0xc210b090},
-
objsize =0x40=64, flags = 0x22000, num = 58, spinlock = {lock = 0, magic = 0xdead4ead}, batchcount = 0, gfporder = 0, gfpflags = 0, colour = 1, colour_off = 128, colour_next = 0,
-
slabp_cache = 0x0, growing = 0, dflags = 0, ctor = 0x0, dtor = 0x0, failures = 0, name = "size-32", '\000' <repeats 12 times>, next = {next = 0xc02cf830 <cache_cache+112>,
-
prev = 0xc210b1e8}, cpudata = {0x0 <repeats 32 times>}}
2.4 分配内存并初始化slabp
-
static int kmem_cache_grow (kmem_cache_t * cachep, int flags)
-
{
-
slab_t *slabp;
-
struct page *page;
-
void *objp;
-
size_t offset;
-
unsigned int i, local_flags;
-
unsigned long ctor_flags;
-
unsigned long save_flags;
-
-
... //省略一些判断
-
-
ctor_flags = SLAB_CTOR_CONSTRUCTOR; //执行后ctor_flags=0x1
-
local_flags = (flags & SLAB_LEVEL_MASK); //执行后local_flags=0x1f0
-
if (local_flags == SLAB_ATOMIC)
-
ctor_flags |= SLAB_CTOR_ATOMIC;
-
-
/* About to mess with non-constant members - lock. */
-
spin_lock_irqsave(&cachep->spinlock, save_flags);
-
-
/* Get colour for the slab, and cal the next value. */
-
offset = cachep->colour_next; //此时cachep->colour_next=0x0执行后offset=0x0
-
cachep->colour_next++; //执行后cachep->colour_next=0x1
-
if (cachep->colour_next >= cachep->colour) //此时colour_next=0x1, colour=0x1
-
cachep->colour_next = 0; //执行后cachep->colour_next=0x0
-
offset *= cachep->colour_off; //执行前offset=0x0,所以执行后offset=0x0
-
cachep->dflags |= DFLGS_GROWN; //执行后cachep->dflags=0x01
-
-
cachep->growing++; //执行前growing=0x0,所以执行后growing=0x1
-
spin_unlock_irqrestore(&cachep->spinlock, save_flags);
-
-
/* Get mem for the objs. */
-
if (!(objp = kmem_getpages(cachep, flags))) //从zone_normal处分配一页内存0xc210c000
-
goto failed;
-
//计算slab管理区的大小 --> 参数offset=0x0,local_flags=0x1f0
-
//执行后--> p *slabp = 0xc210c000 = {list = {next = 0x0, prev = 0x0}, colouroff =256=0x100, s_mem = 0xc210c100, inuse = 0, free = 0}
-
if (!(slabp = kmem_cache_slabmgmt(cachep, objp, offset, local_flags))) //2.4.1执行后slabp指向0xc210c000,计算slab的管理区大小
-
goto opps1;
-
-
/* I hope this is OK. */
-
i = 1 << cachep->gfporder; //执行前cachep->gfporder=0x0,所以执行后i=0x1
-
page = virt_to_page(objp); //将虚地址objp转为mem_map中的管理page
-
do {
-
SET_PAGE_CACHE(page, cachep); //page->list.next=cachep
-
SET_PAGE_SLAB(page, slabp); //page->list.pre=slabp
-
PageSetSlab(page); //将page->flags设为PG_slab
-
page++;
-
} while (--i);
-
-
kmem_cache_init_objs(cachep, slabp, ctor_flags); //2.4.2 初始化slab的管理区
-
spin_lock_irqsave(&cachep->spinlock, save_flags);
-
cachep->growing--; //执行后cachep->growing=0x0
-
-
/* Make slab active. */
-
list_add_tail(&slabp->list, &cachep->slabs_free); //把申请到的slab加到slabs_frees中
-
STATS_INC_GROWN(cachep); //使cachep->grown++,执行后cachep->grown=0x01
-
cachep->failures = 0;
-
-
spin_unlock_irqrestore(&cachep->spinlock, save_flags);
-
return 1;
-
opps1:
-
kmem_freepages(cachep, objp);
-
failed:
-
spin_lock_irqsave(&cachep->spinlock, save_flags);
-
cachep->growing--;
-
spin_unlock_irqrestore(&cachep->spinlock, save_flags);
-
return 0;
-
}
2.4.1 按照cachp->num计算colour_off的值,即slab管理区的大小
参数说明: objp是刚从zone_normal中分配的一页内存的首地址0xc210c000
colour_off=0x0, local_flags=0x1F0=GFP_KERNEL
-
static inline slab_t * kmem_cache_slabmgmt (kmem_cache_t *cachep,
-
void *objp, int colour_off, int local_flags)
-
{
-
slab_t *slabp;
-
-
if (OFF_SLAB(cachep)) {
-
/* Slab management obj is off-slab. */
-
slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
-
if (!slabp)
-
return NULL;
-
} else {
-
//计算colour_off,如果cachep->num很多的话,这个colour_off也会变大,而不是固定1个L1_CACHE_ALIGN
-
slabp = objp+colour_off; //执行前colour_off=0x0,执行后slabp是申请到的一页内存的首地址0xc210c000
-
colour_off += L1_CACHE_ALIGN(cachep->num * sizeof(kmem_bufctl_t) + sizeof(slab_t)); //执行后colour_off=256个
-
}
-
slabp->inuse = 0;
-
slabp->colouroff = colour_off;
-
slabp->s_mem = objp+colour_off;
-
//执行后{list = {next = 0x0, prev = 0x0}, colouroff = 256, s_mem = 0xc210c100, inuse = 0, free = 0}
-
return slabp;
-
}
2.4.2 初始化slab的管理区
-
static inline void kmem_cache_init_objs (kmem_cache_t * cachep,
-
slab_t * slabp, unsigned long ctor_flags)
-
{
-
int i;
-
-
for (i = 0; i < cachep->num; i++) { //cachep->num=58
-
void* objp = slabp->s_mem+cachep->objsize*i; //slabp->s_mem=0xc210c100
-
if (cachep->ctor)
-
cachep->ctor(objp, cachep, ctor_flags); //这儿不执行
-
slab_bufctl(slabp)[i] = i+1;
-
}
-
slab_bufctl(slabp)[i-1] = BUFCTL_END; //#define slab_bufctl(slabp) (kmem_bufctl_t *)(((slab_t*)slabp)+1))
-
slabp->free = 0;
-
}
执行后:
-
(gdb) p *slabp = 0xc210c000
-
$15 = {list = {next = 0x0, prev = 0x0}, colouroff = 256, s_mem = 0xc210c100, inuse = 0, free = 0}
-
(gdb) x /128wx slabp
-
0xc210c000: 0x00000000 0x00000000 0x00000100 0xc210c100
-
0xc210c010: 0x00000000 0x00000000 0x00000001 0x00000002
-
0xc210c020: 0x00000003 0x00000004 0x00000005 0x00000006
-
0xc210c030: 0x00000007 0x00000008 0x00000009 0x0000000a
-
0xc210c040: 0x0000000b 0x0000000c 0x0000000d 0x0000000e
-
0xc210c050: 0x0000000f 0x00000010 0x00000011 0x00000012
-
0xc210c060: 0x00000013 0x00000014 0x00000015 0x00000016
-
0xc210c070: 0x00000017 0x00000018 0x00000019 0x0000001a
-
0xc210c080: 0x0000001b 0x0000001c 0x0000001d 0x0000001e
-
0xc210c090: 0x0000001f 0x00000020 0x00000021 0x00000022
-
0xc210c0a0: 0x00000023 0x00000024 0x00000025 0x00000026
-
0xc210c0b0: 0x00000027 0x00000028 0x00000029 0x0000002a
-
0xc210c0c0: 0x0000002b 0x0000002c 0x0000002d 0x0000002e
-
0xc210c0d0: 0x0000002f 0x00000030 0x00000031 0x00000032
-
0xc210c0e0: 0x00000033 0x00000034 0x00000035 0x00000036
-
0xc210c0f0: 0x00000037 0x00000038 0x00000039 0xffffffff
-
0xc210c100: 0x00000000 0x00000000 0x00000000 0x00000000
对比kmem_cache_create时的slapb
-
(gdb) p *slabp
$19 = {list = {next = 0x0, prev = 0x0}, colouroff = 128, s_mem = 0xc210b080, inuse = 0, free = 0}
-
-
(gdb) x /64wx slabp
-
0xc210b000: 0x00000000 0x00000000 0x00000080 0xc210b080
-
0xc210b010: 0x00000000 0x00000000 0x00000001 0x00000002
-
0xc210b020: 0x00000003 0x00000004 0x00000005 0x00000006
-
0xc210b030: 0x00000007 0x00000008 0x00000009 0x0000000a
-
0xc210b040: 0x0000000b 0x0000000c 0x0000000d 0x0000000e
-
0xc210b050: 0x0000000f 0xffffffff 0x00000000 0x00000000 //由原先的0x00000010-->0xFFFFFFFF代表是END
注意:slab_bufctl(slabp)中存的是下一项的值:
例如第0项存的是1,代表下一个是第1项; 第16项存的是0xFFFFFFFF代表结束
2.3.1
下面是第2次进入kmem_cache_alloc_one时会调用kmem_cache_alloc_one_tail
-
static inline void * kmem_cache_alloc_one_tail (kmem_cache_t *cachep, slab_t *slabp)
-
{
-
void *objp;
-
//slabp=0xc210c000
-
//里面的内容是{list = {next = 0xc210b088, prev = 0xc210b088}, colouroff = 256, s_mem = 0xc210c100, inuse = 1, free = 0}
-
STATS_INC_ALLOCED(cachep);
-
STATS_INC_ACTIVE(cachep);
-
STATS_SET_HIGH(cachep);
-
-
slabp->inuse++; //执行后slabp->inuse=1
-
objp = slabp->s_mem + slabp->free*cachep->objsize; //目前slabp->free=0,所以执行后objp=0xc210c100
-
slabp->free=slab_bufctl(slabp)[slabp->free]; //执行后slabp->free=1,slab_bufctl(slabp)[slabp->free]是下一项的值
-
-
if (unlikely(slabp->free == BUFCTL_END)) { //如果slabp->free到头了,说明没有剩余的了,如果当前是第57项,即slab可以分配的最后一项
-
list_del(&slabp->list); //就把slabp->list清空,
-
list_add(&slabp->list, &cachep->slabs_full); //放到slabs_full中。然后下一次kmem_cache_alloc_one中知道slabs->free与slabs->part都为空。
-
}
-
return objp; //返回0xc210c100
-
}
-
解释kmem_cache_t的获取,即源码中的objp句,如下图所示:

2.5 总结
将上图中写0的黄色小框的地址0xc210c100返回,那么这个0xc210c100就是kmalloc申请到的地址。
下一次再在size-32中申请时,会返回写1的黄色小框的地址,
....
直到返回写57的黄色小框的地址之后,己经全部都占用了,就把这个slab->partial这个list中指向这一页的结点删掉,插入到slabs->full队列中。
最终形成了如下的链表
阅读(1659) | 评论(0) | 转发(0) |