上篇分析了一下cache的创建过程,现在cache已经创建完成,跟踪一下slab对象的申请过程。
目前使用的申请方式主要是kmalloc(从general cache中申请)和kmem_cache_alloc(从专用cache中申请)。
先看一下kmalloc
-
static __always_inline void *kmalloc(size_t size, gfp_t flags)
-
{
-
struct kmem_cache *cachep;
-
void *ret;
-
-
/* __builtin_constant_p Gcc的内置函数,用于判断一个值是否为常量,如果是常量则返回1 */
-
if (__builtin_constant_p(size)) {
-
int i = 0;
-
-
if (!size)
-
return ZERO_SIZE_PTR;
-
-
#define CACHE(x) \
-
if (size <= x) \
-
goto found; \
-
else \
-
i++;
-
#include <linux/kmalloc_sizes.h>
-
#undef CACHE
-
return NULL;
-
found:
-
#ifdef CONFIG_ZONE_DMA
-
if (flags & GFP_DMA)
-
cachep = malloc_sizes[i].cs_dmacachep;
-
else
-
#endif
-
cachep = malloc_sizes[i].cs_cachep;
-
-
ret = kmem_cache_alloc_notrace(cachep, flags);
-
-
trace_kmalloc(_THIS_IP_, ret,
-
size, slab_buffer_size(cachep), flags);
-
-
return ret;
-
}
-
/* 正常调用的分配函数 */
-
return __kmalloc(size, flags);
-
}
-
void *__kmalloc(size_t size, gfp_t flags)
-
{
-
return __do_kmalloc(size, flags, NULL);
-
}
-
/**
-
* __do_kmalloc - allocate memory
-
* @size: how many bytes of memory are required.
-
* @flags: the type of memory to allocate (see kmalloc).
-
* @caller: function caller for debug tracking of the caller
-
*/
-
static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
-
void *caller)
-
{
-
struct kmem_cache *cachep;
-
void *ret;
-
-
/* If you want to save a few bytes .text space: replace
-
* __ with kmem_.
-
* Then kmalloc uses the uninlined functions instead of the inline
-
* functions.
-
*/
-
/* 根据size大小,查找对应的general cache */
-
cachep = __find_general_cachep(size, flags);
-
/* 对于0size的kmalloc请求,直接返回cache的地址 */
-
if (unlikely(ZERO_OR_NULL_PTR(cachep)))
-
return cachep;
-
/* 具体分配在这里进行 */
-
ret = __cache_alloc(cachep, flags, caller);
-
-
trace_kmalloc((unsigned long) caller, ret,
-
size, cachep->buffer_size, flags);
-
-
return ret;
-
}
而__cache_alloc函数中实际上调用的是__do_cache_alloc,对于非NUMA架构
-
static __always_inline void *
-
__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
-
{
-
return ____cache_alloc(cachep, flags);
-
}
-
static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
-
{
-
void *objp;
-
struct array_cache *ac;
-
-
check_irq_off();
-
/* cachep->array[smp_processor_id()],获取当前cpu对应的array_cache */
-
ac = cpu_cache_get(cachep);
-
/* 检查是否存在可用对象,avail指向当前可用的节点 */
-
if (likely(ac->avail)) {
-
/* 如果存在可用对象,更新local cache的命中次数 */
-
STATS_INC_ALLOCHIT(cachep);
-
/* 标示最近使用过local_cache */
-
ac->touched = 1;
-
/* 获取空闲对象,从后向前,当avail变为0时表示已无可用对象 */
-
objp = ac->entry[--ac->avail];
-
} else {
-
/* local cache中已无空闲对象,更新未命中次数 */
-
STATS_INC_ALLOCMISS(cachep);
-
/* local cache中无空闲对象,则从slab的几个链表中提取空闲对象放入local cache中 */
-
objp = cache_alloc_refill(cachep, flags);
-
}
-
/*
-
* To avoid a false negative, if an object that is in one of the
-
* per-CPU caches is leaked, we need to make sure kmemleak doesn't
-
* treat the array pointers as a reference to the object.
-
*/
-
/* 对于分配出去的对象,将对应的指针置为NULL */
-
kmemleak_erase(&ac->entry[ac->avail]);
-
return objp;
-
}
-
static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
-
{
-
int batchcount;
-
struct kmem_list3 *l3;
-
struct array_cache *ac;
-
int node;
-
-
retry:
-
check_irq_off();
-
/* 获取当前的NUMA节点 */
-
node = numa_node_id();
-
/* 获取local cache */
-
ac = cpu_cache_get(cachep);
-
/* 批量填充的数目 */
-
batchcount = ac->batchcount;
-
/* 如果最近未使用过该local cache,则一次填充的上限为BATCHREFILL_LIMIT个 */
-
if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
-
/*
-
* If there was little recent activity on this cache, then
-
* perform only a partial refill. Otherwise we could generate
-
* refill bouncing.
-
*/
-
batchcount = BATCHREFILL_LIMIT;
-
}
-
/* 获取本内存节点的kmem_list3的几个slab链表 */
-
l3 = cachep->nodelists[node];
-
-
BUG_ON(ac->avail > 0 || !l3);
-
spin_lock(&l3->list_lock);
-
-
/* See if we can refill from the shared array */
-
/* shared local cache 用于多核中,所有cpu共享,首先从shared中批量获取slab对象到local */
-
if (l3->shared && transfer_objects(ac, l3->shared, batchcount))
-
goto alloc_done;
-
/* 如果shared为空,或者已无空闲对象,则从slab链表中分配 */
-
while (batchcount > 0) {
-
struct list_head *entry;
-
struct slab *slabp;
-
/* Get slab alloc is to come from. */
-
/* 先从部分未满的slab链表中分配 */
-
entry = l3->slabs_partial.next;
-
/* 判断是否为空 */
-
if (entry == &l3->slabs_partial) {
-
/* 标示刚访问了空链表 */
-
l3->free_touched = 1;
-
entry = l3->slabs_free.next;
-
/* 如果空链表为空,则必须新增slab */
-
if (entry == &l3->slabs_free)
-
goto must_grow;
-
}
-
/* 从链表上获取到了一个slab */
-
slabp = list_entry(entry, struct slab, list);
-
check_slabp(cachep, slabp);
-
check_spinlock_acquired(cachep);
-
-
/*
-
* The slab was either on partial or free list so
-
* there must be at least one object available for
-
* allocation.
-
*/
-
BUG_ON(slabp->inuse >= cachep->num);
-
/* 当前slab的对象活跃数必须小于每个slab的最大对象数 */
-
while (slabp->inuse < cachep->num && batchcount--) {
-
STATS_INC_ALLOCED(cachep);
-
STATS_INC_ACTIVE(cachep);
-
STATS_SET_HIGH(cachep);
-
/* 从slab中提取空闲对象,将虚拟地址插入到local cache中 */
-
ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,
-
node);
-
}
-
check_slabp(cachep, slabp);
-
-
/* move slabp to correct slabp list: */
-
/* 从原链表中删除slab */
-
list_del(&slabp->list);
-
if (slabp->free == BUFCTL_END)
-
/* 此slab中已经没有空闲对象,移动到full链表中 */
-
list_add(&slabp->list, &l3->slabs_full);
-
else
-
/* 此slab中还有空闲对象,移动到partial链表中 */
-
list_add(&slabp->list, &l3->slabs_partial);
-
}
-
-
must_grow:
-
/* 从slab链表中添加了avail个空闲对象到local cache中,空闲的对象数量需要更新一下 */
-
l3->free_objects -= ac->avail;
-
alloc_done:
-
spin_unlock(&l3->list_lock);
-
/* slab链表中也无空闲对象,创建新的slab */
-
if (unlikely(!ac->avail)) {
-
int x;
-
/* 创建空slab */
-
x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
-
-
/* cache_grow can reenable interrupts, then ac could change. */
-
/* 看注释,由于cache_grow开启了中断,local cache指针可能发生裱花,ac需要重新获取 */
-
ac = cpu_cache_get(cachep);
-
/* 新的slab创建失败 */
-
if (!x && ac->avail == 0) /* no objects in sight? abort */
-
return NULL;
-
/* 新增slab成功,重新填充local cache */
-
if (!ac->avail) /* objects refilled by interrupt? */
-
goto retry;
-
}
-
/* 设置近期访问的标志 */
-
ac->touched = 1;
-
/* 返回空闲对象的地址 */
-
return ac->entry[--ac->avail];
-
}
分析一下几个函数
-
/*
-
* Transfer objects in one arraycache to another.
-
* Locking must be handled by the caller.
-
*
-
* Return the number of entries transferred.
-
*/
-
/* 从shared local cache中移动对象到local cache中,shared local cache 被同一NUMA节点的CPU所共享 */
-
static int transfer_objects(struct array_cache *to,
-
struct array_cache *from, unsigned int max)
-
{
-
/* Figure out how many entries to transfer */
-
int nr = min(min(from->avail, max), to->limit - to->avail);
-
-
if (!nr)
-
return 0;
-
/* 拷贝并更新相关成员 */
-
memcpy(to->entry + to->avail, from->entry + from->avail -nr,
-
sizeof(void *) *nr);
-
-
from->avail -= nr;
-
to->avail += nr;
-
to->touched = 1;
-
return nr;
-
}
-
static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
-
int nodeid)
-
{
-
/* 获取空闲对象,free是slabp中第一个空闲对象索引 */
-
/* index_to_obj: slab->s_mem + cache->buffer_size * idx; s_mem是slab中第一个对象的起始地址,buffer_size是每个对象的大小*/
-
void *objp = index_to_obj(cachep, slabp, slabp->free);
-
kmem_bufctl_t next;
-
/* 更新当前slab中活跃对象的数量 */
-
slabp->inuse++;
-
/* 获取下一个空闲对象的索引 */
-
/* slab_bufctl: (kmem_bufctl_t *) (slabp + 1) */
-
next = slab_bufctl(slabp)[slabp->free];
-
#if DEBUG
-
slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
-
WARN_ON(slabp->nodeid != nodeid);
-
#endif
-
/* 指向下一个空闲对象索引 */
-
slabp->free = next;
-
-
return objp;
-
}
阅读(1944) | 评论(0) | 转发(2) |