Chinaunix首页 | 论坛 | 博客
  • 博客访问: 1036336
  • 博文数量: 123
  • 博客积分: 5051
  • 博客等级: 大校
  • 技术积分: 1356
  • 用 户 组: 普通用户
  • 注册时间: 2008-07-14 10:56
文章分类
文章存档

2012年(1)

2011年(21)

2010年(13)

2009年(55)

2008年(33)

分类: LINUX

2009-07-19 10:22:11

/**
 * kmem_cache_create - Create a cache.
   @name:用于/proc/slabinfo文件中确认此高速缓冲的字符串
   @size:要创建的cache所对应对象的大小
   @align:对象对齐偏移量
   @flags:对应slab的标志
   @ctor:构建对象构造函数
 *
   函数成功时返回指向cache的指针,失败时返回NULL.
   当针对cache的新的页框分配成功时运行ctor构造函数
   flags参数的值有:%SLAB_POISON,%SLAB_RED_ZONE,%SLAB_HWCACHE_ALIGN
 */
struct kmem_cache *
kmem_cache_create (const char *name, size_t size, size_t align,
    unsigned long flags,
    void (*ctor)(void*, struct kmem_cache *, unsigned long))
{
    size_t left_over, slab_size, ralign;
    struct kmem_cache *cachep = NULL, *pc;

    /*
     * Sanity checks... these are all serious usage bugs.
     */
    if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||
        size > KMALLOC_MAX_SIZE) {
        printk(KERN_ERR "%s: Early error in slab %s\n", __FUNCTION__,
                name);
        BUG();
    }

    /*
     * We use cache_chain_mutex to ensure a consistent view of
     * cpu_online_map as well.  Please see cpuup_callback
     */
    mutex_lock(&cache_chain_mutex);

    list_for_each_entry(pc, &cache_chain, next) {
        char tmp;
        int res;

        /*
         * This happens when the module gets unloaded and doesn't
         * destroy its slab cache and no-one else reuses the vmalloc
         * area of the module.  Print a warning.
         */
        res = probe_kernel_address(pc->name, tmp);
        if (res) {
            printk(KERN_ERR
                   "SLAB: cache with size %d has lost its name\n",
                   pc->buffer_size);
            continue;
        }

        if (!strcmp(pc->name, name)) {
            printk(KERN_ERR
                   "kmem_cache_create: duplicate cache %s\n", name);
            dump_stack();
            goto oops;
        }
    }

#if DEBUG
    WARN_ON(strchr(name, ' '));    /* It confuses parsers */
#if FORCED_DEBUG
    /*
     * Enable redzoning and last user accounting, except for caches with
     * large objects, if the increased size would increase the object size
     * above the next power of two: caches with object sizes just above a
     * power of two have a significant amount of internal fragmentation.
     */
    if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
                        2 * sizeof(unsigned long long)))
        flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
    if (!(flags & SLAB_DESTROY_BY_RCU))
        flags |= SLAB_POISON;
#endif
    if (flags & SLAB_DESTROY_BY_RCU)
        BUG_ON(flags & SLAB_POISON);
#endif
    /*
     * Always checks flags, a caller might be expecting debug support which
     * isn't available.
     */
    BUG_ON(flags & ~CREATE_MASK);

    /*
     * Check that size is in terms of words.  This is needed to avoid
     * unaligned accesses for some archs when redzoning is used, and makes
     * sure any on-slab bufctl's are also correctly aligned.
     */
    if (size & (BYTES_PER_WORD - 1)) {
        size += (BYTES_PER_WORD - 1);
        size &= ~(BYTES_PER_WORD - 1);
    }

    /* calculate the final buffer alignment: */

    /* 1) arch recommendation: can be overridden for debug */
    if (flags & SLAB_HWCACHE_ALIGN) {
        /*
         * Default alignment: as specified by the arch code.  Except if
         * an object is really small, then squeeze multiple objects into
         * one cacheline.
         */
        ralign = cache_line_size();
        while (size <= ralign / 2)
            ralign /= 2;
    } else {
        ralign = BYTES_PER_WORD;
    }

    /*
     * Redzoning and user store require word alignment or possibly larger.
     * Note this will be overridden by architecture or caller mandated
     * alignment if either is greater than BYTES_PER_WORD.
     */
    if (flags & SLAB_STORE_USER)
        ralign = BYTES_PER_WORD;

    if (flags & SLAB_RED_ZONE) {
        ralign = REDZONE_ALIGN;
        /* If redzoning, ensure that the second redzone is suitably
         * aligned, by adjusting the object size accordingly. */
        size += REDZONE_ALIGN - 1;
        size &= ~(REDZONE_ALIGN - 1);
    }

    /* 2) arch mandated alignment */
    if (ralign < ARCH_SLAB_MINALIGN) {
        ralign = ARCH_SLAB_MINALIGN;
    }
    /* 3) caller mandated alignment */
    if (ralign < align) {
        ralign = align;
    }
    /* disable debug if necessary */
    if (ralign > __alignof__(unsigned long long))
        flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
    /*
     * 4) Store it.
     */
    align = ralign;

    /* Get cache's description obj. */
    cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL);
    if (!cachep)
        goto oops;

#if DEBUG
    cachep->obj_size = size;

    /*
     * Both debugging options require word-alignment which is calculated
     * into align above.
     */
    if (flags & SLAB_RED_ZONE) {
        /* add space for red zone words */
        cachep->obj_offset += sizeof(unsigned long long);
        size += 2 * sizeof(unsigned long long);
    }
    if (flags & SLAB_STORE_USER) {
        /* user store requires one word storage behind the end of
         * the real object. But if the second red zone needs to be
         * aligned to 64 bits, we must allow that much space.
         */
        if (flags & SLAB_RED_ZONE)
            size += REDZONE_ALIGN;
        else
            size += BYTES_PER_WORD;
    }
#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
    if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
        && cachep->obj_size > cache_line_size() && size < PAGE_SIZE) {
        cachep->obj_offset += PAGE_SIZE - size;
        size = PAGE_SIZE;
    }
#endif
#endif

    /*
     * Determine if the slab management is 'on' or 'off' slab.
     * (bootstrapping cannot cope with offslab caches so don't do
     * it too early on.)
     */
    if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init)
        /*
         * Size is large, assume best to place the slab management obj
         * off-slab (should allow better packing of objs).
         */
        flags |= CFLGS_OFF_SLAB;

    size = ALIGN(size, align);

    left_over = calculate_slab_order(cachep, size, align, flags);

    if (!cachep->num) {
        printk(KERN_ERR
               "kmem_cache_create: couldn't create cache %s.\n", name);
        kmem_cache_free(&cache_cache, cachep);
        cachep = NULL;
        goto oops;
    }
    slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
              + sizeof(struct slab), align);

    /*
     * If the slab has been placed off-slab, and we have enough space then
     * move it on-slab. This is at the expense of any extra colouring.
     */
    if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
        flags &= ~CFLGS_OFF_SLAB;
        left_over -= slab_size;
    }

    if (flags & CFLGS_OFF_SLAB) {
        /* really off slab. No need for manual alignment */
        slab_size =
            cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
    }

    cachep->colour_off = cache_line_size();
    /* Offset must be a multiple of the alignment. */
    if (cachep->colour_off < align)
        cachep->colour_off = align;
    cachep->colour = left_over / cachep->colour_off;
    cachep->slab_size = slab_size;
    cachep->flags = flags;
    cachep->gfpflags = 0;
    if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
        cachep->gfpflags |= GFP_DMA;
    cachep->buffer_size = size;
    cachep->reciprocal_buffer_size = reciprocal_value(size);

    if (flags & CFLGS_OFF_SLAB) {
        cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
        /*
         * This is a possibility for one of the malloc_sizes caches.
         * But since we go off slab only for object size greater than
         * PAGE_SIZE/8, and malloc_sizes gets created in ascending order,
         * this should not happen at all.
         * But leave a BUG_ON for some lucky dude.
         */
        BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));
    }
    cachep->ctor = ctor;
    cachep->name = name;

    if (setup_cpu_cache(cachep)) {
        __kmem_cache_destroy(cachep);
        cachep = NULL;
        goto oops;
    }

    /* cache setup completed, link it into the list */
    list_add(&cachep->next, &cache_chain);
oops:
    if (!cachep && (flags & SLAB_PANIC))
        panic("kmem_cache_create(): failed to create slab `%s'\n",
              name);
    mutex_unlock(&cache_chain_mutex);
    return cachep;
}
EXPORT_SYMBOL(kmem_cache_create);

static inline void *kmem_cache_zalloc(struct kmem_cache *k,gfp_t flags)
{
    return kmem_cache_alloc(k,flags | __GFP_ZERO);
}

void *kmem_cache_alloc(struct kmem_cache *cachep,gfp_t flags)
{
    return __cache_alloc(cachep,flags,__builtin_return_address(0));
}

static __always_inline void * __cache_alloc(struct kmem_cache *cachep,gfp_t flags,void *caller)
{
    unsigned long save_flags;
    void *objp;
    
    if(should_failslab(cachep,flags))
        return NULL;
   
    cache_alloc_debugcheck_before(cachep,flags);
    local_irq_save(save_flags);
    objp = __do_cache_alloc(cachep,flags);
    local_irq_restore(save_flags);
    objp = cache_alloc_debugcheck_after(cachep,flags,objp,caller);
    prefetchw(objp);
    if(unlikely((flags & __GFP_ZERO) && objp))
        memset(objp,0,obj_size(cachep));
   
    return objp;
}

static __always_inline void * __do_cache_alloc(struct kmem_cache *cache,gfp_t flags)
{
    void *objp;
    objp = ____cache_alloc(cache,flags);
}

static inline void *____cache_alloc(struct kmem_cache *cachep,gfp_t flags)
{
    void *objp;
    struct array_cache *ac;
    check_irq_off();
   
    ac = cpu_cache_get(cachep);
   
    /*
        如果本地高速缓存中没有可用的对象,那么就执行重新填充
    */
    if(likely(ac->avail)) {
        STATS_INC_ALLOCHIT(cachep);
        ac->touched = 1;
        objp = ac->entry[--ac->avail];
    } else {
        STATS_INC_ALLOCMISS(cachep);
        objp = cache_alloc_refill(cachep,flags);
    }
    return objp;
}

kmem_cache_create的最终实现函数

static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
{
    int batchcount;
    struct kmem_list3 *l3;
    struct array_cache *ac;
    int node;

    node = numa_node_id();

    check_irq_off();
    ac = cpu_cache_get(cachep);
retry:
    /*
        记录本地高速缓存重新填充或者是腾空时使用的块大小
    */
    batchcount = ac->batchcount;
    if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
        /*
         * If there was little recent activity on this cache, then
         * perform only a partial refill.  Otherwise we could generate
         * refill bouncing.
         */
        batchcount = BATCHREFILL_LIMIT;
    }
    /*
        获取本地高速缓存的slab链表
    */
    l3 = cachep->nodelists[node];

    BUG_ON(ac->avail > 0 || !l3);
    spin_lock(&l3->list_lock);

    /* See if we can refill from the shared array */
    /*
        判断是否可以从共享存储区事项重新填充
    */
    if (l3->shared && transfer_objects(ac, l3->shared, batchcount))
        goto alloc_done;

    while (batchcount > 0) {
        struct list_head *entry;
        struct slab *slabp;
        /* Get slab alloc is to come from. */
        entry = l3->slabs_partial.next;
    /*
        如果在部分链表以及空闲链表不存在空闲对象时就执行cache_grow();
    */
        if (entry == &l3->slabs_partial) {
            l3->free_touched = 1;
            entry = l3->slabs_free.next;
            if (entry == &l3->slabs_free)
                goto must_grow;
        }

        slabp = list_entry(entry, struct slab, list);
        check_slabp(cachep, slabp);
        check_spinlock_acquired(cachep);

        /*
         * The slab was either on partial or free list so
         * there must be at least one object available for
         * allocation.
         */
        BUG_ON(slabp->inuse < 0 || slabp->inuse >= cachep->num);

        /*
            如果高速缓存还存在空闲对象,就用batchcount个对象进行填充
        */
        while (slabp->inuse < cachep->num && batchcount--) {
            STATS_INC_ALLOCED(cachep);
            STATS_INC_ACTIVE(cachep);
            STATS_SET_HIGH(cachep);

            ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,
                                node);
        }
        check_slabp(cachep, slabp);

        /* move slabp to correct slabp list: */
        list_del(&slabp->list);
        if (slabp->free == BUFCTL_END)
            list_add(&slabp->list, &l3->slabs_full);
        else
            list_add(&slabp->list, &l3->slabs_partial);
    }

must_grow:
    l3->free_objects -= ac->avail;
alloc_done:
    spin_unlock(&l3->list_lock);

    if (unlikely(!ac->avail)) { pptoeconf
        int x;
        /*
            使用cache_growf为高速缓存分配一个新的slab
        */
        x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);

        /* cache_grow can reenable interrupts, then ac could change. */
        ac = cpu_cache_get(cachep);
        if (!x && ac->avail == 0)    /* no objects in sight? abort */
            return NULL;

        if (!ac->avail)        /* objects refilled by interrupt? */
            goto retry;
    }
    ac->touched = 1;
    return ac->entry[--ac->avail];
}
(待续)
阅读(9140) | 评论(2) | 转发(0) |
0

上一篇:简单磁盘管理

下一篇:C++中的指针和const

给主人留下些什么吧!~~

QUNZI裙子qunziqunzi2015-11-21 15:21:41

渣渣

梦中的人在做梦2015-08-03 19:35:04

没技术含量的文章为什么叫解析?