kmem_cache_create解析(内存管理)-chinahhucai-ChinaUnix博客

love opensource

首页　| 　博文目录　| 　关于我

chinahhucai

博客访问： 1043920
博文数量： 123
博客积分： 5051
博客等级：大校
技术积分： 1356
用户组：普通用户
注册时间： 2008-07-14 10:56

文章分类

全部博文（123）

计算机图形学（6）
基本算法（1）
生活感悟（3）
程序员文章（2）

事业计划（0）

学习计划（0）
网络开发（3）
编程语言（13）

C（5）

C＋＋（4）

汇编程序（4）
linux系统应用（27）

Makefile应用（0）

shell编程（4）
linux设备驱动（11）

驱动实践（0）

设备驱动知识（11）
多核程序设计（3）
linux内核学习（20）
文章收藏（10）

linux C 编程（1）

linux内核分析（6）
java技术（16）

Android（10）
Linux心得（2）
未分配的博文（6）

文章存档

2012年（1）

2011年（21）

2010年（13）

2009年（55）

2008年（33）

我的朋友

相关博文

kmem_cache_create解析(内存管理)

分类： LINUX

2009-07-19 10:22:11

/**
* kmem_cache_create - Create a cache.
   @name:用于/proc/slabinfo文件中确认此高速缓冲的字符串
   @size:要创建的cache所对应对象的大小
   @align:对象对齐偏移量
   @flags:对应slab的标志
   @ctor:构建对象构造函数
*
   函数成功时返回指向cache的指针，失败时返回NULL.
   当针对cache的新的页框分配成功时运行ctor构造函数
   flags参数的值有：%SLAB_POISON,%SLAB_RED_ZONE,%SLAB_HWCACHE_ALIGN
*/
struct kmem_cache *
kmem_cache_create (const char *name, size_t size, size_t align,
   unsigned long flags,
   void (*ctor)(void*, struct kmem_cache *, unsigned long))
{
   size_t left_over, slab_size, ralign;
   struct kmem_cache *cachep = NULL, *pc;

   /*
   * Sanity checks... these are all serious usage bugs.
   */
   if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||
        size > KMALLOC_MAX_SIZE) {
       printk(KERN_ERR "%s: Early error in slab %s\n", __FUNCTION__,
               name);
       BUG();
   }

   /*
   * We use cache_chain_mutex to ensure a consistent view of
   * cpu_online_map as well. Please see cpuup_callback
   */
   mutex_lock(&cache_chain_mutex);

   list_for_each_entry(pc, &cache_chain, next) {
       char tmp;
       int res;

       /*
       * This happens when the module gets unloaded and doesn't
       * destroy its slab cache and no-one else reuses the vmalloc
       * area of the module. Print a warning.
       */
       res = probe_kernel_address(pc->name, tmp);
       if (res) {
           printk(KERN_ERR
                   "SLAB: cache with size %d has lost its name\n",
                   pc->buffer_size);
           continue;
       }

       if (!strcmp(pc->name, name)) {
           printk(KERN_ERR
                   "kmem_cache_create: duplicate cache %s\n", name);
           dump_stack();
           goto oops;
       }
   }

#if DEBUG
   WARN_ON(strchr(name, ' '));   /* It confuses parsers */
#if FORCED_DEBUG
   /*
   * Enable redzoning and last user accounting, except for caches with
   * large objects, if the increased size would increase the object size
   * above the next power of two: caches with object sizes just above a
   * power of two have a significant amount of internal fragmentation.
   */
   if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
                       2 * sizeof(unsigned long long)))
       flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
   if (!(flags & SLAB_DESTROY_BY_RCU))
       flags |= SLAB_POISON;
#endif
   if (flags & SLAB_DESTROY_BY_RCU)
       BUG_ON(flags & SLAB_POISON);
#endif
   /*
   * Always checks flags, a caller might be expecting debug support which
   * isn't available.
   */
   BUG_ON(flags & ~CREATE_MASK);

   /*
   * Check that size is in terms of words. This is needed to avoid
   * unaligned accesses for some archs when redzoning is used, and makes
   * sure any on-slab bufctl's are also correctly aligned.
   */
   if (size & (BYTES_PER_WORD - 1)) {
       size += (BYTES_PER_WORD - 1);
       size &= ~(BYTES_PER_WORD - 1);
   }

   /* calculate the final buffer alignment: */

   /* 1) arch recommendation: can be overridden for debug */
   if (flags & SLAB_HWCACHE_ALIGN) {
       /*
       * Default alignment: as specified by the arch code. Except if
       * an object is really small, then squeeze multiple objects into
       * one cacheline.
       */
       ralign = cache_line_size();
       while (size <= ralign / 2)
           ralign /= 2;
   } else {
       ralign = BYTES_PER_WORD;
   }

   /*
   * Redzoning and user store require word alignment or possibly larger.
   * Note this will be overridden by architecture or caller mandated
   * alignment if either is greater than BYTES_PER_WORD.
   */
   if (flags & SLAB_STORE_USER)
       ralign = BYTES_PER_WORD;

   if (flags & SLAB_RED_ZONE) {
       ralign = REDZONE_ALIGN;
       /* If redzoning, ensure that the second redzone is suitably
       * aligned, by adjusting the object size accordingly. */
       size += REDZONE_ALIGN - 1;
       size &= ~(REDZONE_ALIGN - 1);
   }

   /* 2) arch mandated alignment */
   if (ralign < ARCH_SLAB_MINALIGN) {
       ralign = ARCH_SLAB_MINALIGN;
   }
   /* 3) caller mandated alignment */
   if (ralign < align) {
       ralign = align;
   }
   /* disable debug if necessary */
   if (ralign > __alignof__(unsigned long long))
       flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
   /*
   * 4) Store it.
   */
   align = ralign;

   /* Get cache's description obj. */
   cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL);
   if (!cachep)
       goto oops;

#if DEBUG
   cachep->obj_size = size;

   /*
   * Both debugging options require word-alignment which is calculated
   * into align above.
   */
   if (flags & SLAB_RED_ZONE) {
       /* add space for red zone words */
       cachep->obj_offset += sizeof(unsigned long long);
       size += 2 * sizeof(unsigned long long);
   }
   if (flags & SLAB_STORE_USER) {
       /* user store requires one word storage behind the end of
       * the real object. But if the second red zone needs to be
       * aligned to 64 bits, we must allow that much space.
       */
       if (flags & SLAB_RED_ZONE)
           size += REDZONE_ALIGN;
       else
           size += BYTES_PER_WORD;
   }
#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
   if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
        && cachep->obj_size > cache_line_size() && size < PAGE_SIZE) {
       cachep->obj_offset += PAGE_SIZE - size;
       size = PAGE_SIZE;
   }
#endif
#endif

   /*
   * Determine if the slab management is 'on' or 'off' slab.
   * (bootstrapping cannot cope with offslab caches so don't do
   * it too early on.)
   */
   if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init)
       /*
       * Size is large, assume best to place the slab management obj
       * off-slab (should allow better packing of objs).
       */
       flags |= CFLGS_OFF_SLAB;

   size = ALIGN(size, align);

   left_over = calculate_slab_order(cachep, size, align, flags);

   if (!cachep->num) {
       printk(KERN_ERR
               "kmem_cache_create: couldn't create cache %s.\n", name);
       kmem_cache_free(&cache_cache, cachep);
       cachep = NULL;
       goto oops;
   }
   slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
              + sizeof(struct slab), align);

   /*
   * If the slab has been placed off-slab, and we have enough space then
   * move it on-slab. This is at the expense of any extra colouring.
   */
   if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
       flags &= ~CFLGS_OFF_SLAB;
       left_over -= slab_size;
   }

   if (flags & CFLGS_OFF_SLAB) {
       /* really off slab. No need for manual alignment */
       slab_size =
            cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
   }

   cachep->colour_off = cache_line_size();
   /* Offset must be a multiple of the alignment. */
   if (cachep->colour_off < align)
       cachep->colour_off = align;
   cachep->colour = left_over / cachep->colour_off;
   cachep->slab_size = slab_size;
   cachep->flags = flags;
   cachep->gfpflags = 0;
   if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
       cachep->gfpflags |= GFP_DMA;
   cachep->buffer_size = size;
   cachep->reciprocal_buffer_size = reciprocal_value(size);

   if (flags & CFLGS_OFF_SLAB) {
       cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
       /*
       * This is a possibility for one of the malloc_sizes caches.
       * But since we go off slab only for object size greater than
       * PAGE_SIZE/8, and malloc_sizes gets created in ascending order,
       * this should not happen at all.
       * But leave a BUG_ON for some lucky dude.
       */
       BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));
   }
   cachep->ctor = ctor;
   cachep->name = name;

   if (setup_cpu_cache(cachep)) {
       __kmem_cache_destroy(cachep);
       cachep = NULL;
       goto oops;
   }

   /* cache setup completed, link it into the list */
   list_add(&cachep->next, &cache_chain);
oops:
   if (!cachep && (flags & SLAB_PANIC))
       panic("kmem_cache_create(): failed to create slab `%s'\n",
              name);
   mutex_unlock(&cache_chain_mutex);
   return cachep;
}
EXPORT_SYMBOL(kmem_cache_create);

static inline void *kmem_cache_zalloc(struct kmem_cache *k,gfp_t flags)
{
   return kmem_cache_alloc(k,flags | __GFP_ZERO);
}

void *kmem_cache_alloc(struct kmem_cache *cachep,gfp_t flags)
{
   return __cache_alloc(cachep,flags,__builtin_return_address(0));
}

static __always_inline void * __cache_alloc(struct kmem_cache *cachep,gfp_t flags,void *caller)
{
   unsigned long save_flags;
   void *objp;

    if(should_failslab(cachep,flags))
        return NULL;

    cache_alloc_debugcheck_before(cachep,flags);
    local_irq_save(save_flags);
    objp = __do_cache_alloc(cachep,flags);
    local_irq_restore(save_flags);
    objp = cache_alloc_debugcheck_after(cachep,flags,objp,caller);
    prefetchw(objp);
    if(unlikely((flags & __GFP_ZERO) && objp))
        memset(objp,0,obj_size(cachep));

    return objp;
}

static __always_inline void * __do_cache_alloc(struct kmem_cache *cache,gfp_t flags)
{
    void *objp;
    objp = ____cache_alloc(cache,flags);
}

static inline void *____cache_alloc(struct kmem_cache *cachep,gfp_t flags)
{
    void *objp;
    struct array_cache *ac;
    check_irq_off();

    ac = cpu_cache_get(cachep);

    /*
        如果本地高速缓存中没有可用的对象，那么就执行重新填充
    */
    if(likely(ac->avail)) {
        STATS_INC_ALLOCHIT(cachep);
        ac->touched = 1;
        objp = ac->entry[--ac->avail];
    } else {
        STATS_INC_ALLOCMISS(cachep);
        objp = cache_alloc_refill(cachep,flags);
    }
    return objp;
}

kmem_cache_create的最终实现函数

static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
{
    int batchcount;
    struct kmem_list3 *l3;
    struct array_cache *ac;
    int node;

    node = numa_node_id();

    check_irq_off();
    ac = cpu_cache_get(cachep);
retry:
    /*
        记录本地高速缓存重新填充或者是腾空时使用的块大小
    */
    batchcount = ac->batchcount;
    if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
        /*
        * If there was little recent activity on this cache, then
        * perform only a partial refill. Otherwise we could generate
        * refill bouncing.
        */
        batchcount = BATCHREFILL_LIMIT;
    }
    /*
        获取本地高速缓存的slab链表
    */
    l3 = cachep->nodelists[node];

    BUG_ON(ac->avail > 0 || !l3);
    spin_lock(&l3->list_lock);

    /* See if we can refill from the shared array */
    /*
        判断是否可以从共享存储区事项重新填充
    */
    if (l3->shared && transfer_objects(ac, l3->shared, batchcount))
        goto alloc_done;

    while (batchcount > 0) {
        struct list_head *entry;
        struct slab *slabp;
        /* Get slab alloc is to come from. */
        entry = l3->slabs_partial.next;
    /*
        如果在部分链表以及空闲链表不存在空闲对象时就执行cache_grow();
    */
        if (entry == &l3->slabs_partial) {
            l3->free_touched = 1;
            entry = l3->slabs_free.next;
            if (entry == &l3->slabs_free)
                goto must_grow;
        }

        slabp = list_entry(entry, struct slab, list);
        check_slabp(cachep, slabp);
        check_spinlock_acquired(cachep);

        /*
        * The slab was either on partial or free list so
        * there must be at least one object available for
        * allocation.
        */
        BUG_ON(slabp->inuse < 0 || slabp->inuse >= cachep->num);

        /*
            如果高速缓存还存在空闲对象，就用batchcount个对象进行填充
        */
        while (slabp->inuse < cachep->num && batchcount--) {
            STATS_INC_ALLOCED(cachep);
            STATS_INC_ACTIVE(cachep);
            STATS_SET_HIGH(cachep);

            ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,
                                node);
        }
        check_slabp(cachep, slabp);

        /* move slabp to correct slabp list: */
        list_del(&slabp->list);
        if (slabp->free == BUFCTL_END)
            list_add(&slabp->list, &l3->slabs_full);
        else
            list_add(&slabp->list, &l3->slabs_partial);
    }

must_grow:
    l3->free_objects -= ac->avail;
alloc_done:
    spin_unlock(&l3->list_lock);

    if (unlikely(!ac->avail)) { pptoeconf
        int x;
        /*
            使用cache_growf为高速缓存分配一个新的slab
        */
        x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);

        /* cache_grow can reenable interrupts, then ac could change. */
        ac = cpu_cache_get(cachep);
        if (!x && ac->avail == 0)    /* no objects in sight? abort */
            return NULL;

        if (!ac->avail)        /* objects refilled by interrupt? */
            goto retry;
    }
    ac->touched = 1;
    return ac->entry[--ac->avail];
}
(待续)

阅读(9183) | 评论(2) | 转发(0) |

上一篇：简单磁盘管理

下一篇：C++中的指针和const

给主人留下些什么吧！~~

QUNZI裙子qunziqunzi2015-11-21 15:21:41

渣渣

回复 | 举报

梦中的人在做梦2015-08-03 19:35:04

没技术含量的文章为什么叫解析？

回复 | 举报

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6