Chinaunix首页 | 论坛 | 博客
  • 博客访问: 2295931
  • 博文数量: 218
  • 博客积分: 5767
  • 博客等级: 大校
  • 技术积分: 5883
  • 用 户 组: 普通用户
  • 注册时间: 2008-03-01 14:44
文章存档

2012年(53)

2011年(131)

2009年(1)

2008年(33)

分类: LINUX

2011-09-14 11:41:26

linux版本:2.6.36
相关数据结构
arch/arm/include/asm/setup.h
#ifdef CONFIG_ARCH_LH7A40X
# define NR_BANKS 16
#else
# define NR_BANKS 8
#endif

struct membank {
    unsigned long start;
    unsigned long size;
    unsigned int highmem;
};

struct meminfo {
    int nr_banks;
    struct membank bank[NR_BANKS];
};
linux内核的内存管理分三个阶段。
1. 启动---->bootmem初始化完成为第一阶段。此阶段只能使用memblock_reserve函数分配内存。
   此阶段结束标志为:init_bootmem_done = 1.
2. bootmem初始化完--->buddy完成前。结束标志为mem_init_done = 1.
3. 全部内存初始化完毕,可以用cache和buddy分配内存。

内存初始化步骤:
1. start_kernel---->setup_arch->pageing_init
                |-->setup_per_cpu_areas
                |-->build_all_zonelists
                |-->mem_init
                 -->setup_per_cpu_pageset
setup_arch-->arm_memblock_init-->memblock_init

内存原始数据由u-boot传入,对照本开发板uboot部分内存初始化函数,我们知道uboot传递过来的tag->u.mem.start, tag->u.mem.size分别为0x30000000,0x4000000,即内存起始地址是0x30000000,大小为64M,start_arch获取u-boot传递的参数地址后,调用了parse_tag_mem32函数对传递的内存参数处理:
556 static int __init parse_tag_mem32(const struct tag *tag)
557 {
558         return arm_add_memory(tag->u.mem.start, tag->u.mem.size);
559 }
560
561 __tagtable(ATAG_MEM, parse_tag_mem32);
如上可见,parse_tag_mem32函数调用arm_add_memory函数把RAM的start和size等参数保存到了meminfo结构的 meminfo结构体中。现在再来分析arm_add_memory
arm_add_memory定义如下(arch/arm/kernel/setup.c)
static int __init arm_add_memory(unsigned long start, unsigned long size)
{
    struct membank *bank = &meminfo.bank[meminfo.nr_banks];

    if (meminfo.nr_banks >= NR_BANKS) {
        printk(KERN_CRIT "NR_BANKS too low, "
            "ignoring memory at %#lx\n", start);
        return -EINVAL;
    }

    /*
     * Ensure that start/size are aligned to a page boundary.
     * Size is appropriately rounded down, start is rounded up.
     */
    size -= start & ~PAGE_MASK;
    bank->start = PAGE_ALIGN(start);
    bank->size  = size & PAGE_MASK;

    /*
     * Check whether this memory region has non-zero size or
     * invalid node number.
     */
    if (bank->size == 0)
        return -EINVAL;

    meminfo.nr_banks++;
    return 0;
}
经过这样的处理,setup.c文件中的meminfo可就不再是
struct meminfo meminfo  = { 0, };
而是
struct meminfo meminfo  = { 1,{0x30000000,0x4000000,0},{}, };
表示当前有一个内存区域,物理地址是从0x30000000开始,大小是64M,节点是0
处理完这些数据后,start_arch会调用第822行调用arm_memblock_init(&meminfo, mdesc);
先分析第1阶段。
1. 初始化:setup_arch-->arm_memblock_init-->memblock_init
arch/arm/mm/init.c
270 void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc)
271 {
272         int i;
273
274         memblock_init();
275         for (i = 0; i < mi->nr_banks; i++)
276                 memblock_add(mi->bank[i].start, mi->bank[i].size);
277
278         /* Register the kernel text, kernel data and initrd with memblock. */
279 #ifdef CONFIG_XIP_KERNEL //配置内核XIP方式运行,避免把内核从Nor 拷贝到主存 空间,让内核运行在低功耗的NOR 上,节省系统启动时间,降低系统对SDRAM 的需要,减少电能消耗,使产品能够持续使用更长时间
280         memblock_reserve(__pa(_data), _end - _data);
281 #else
282         memblock_reserve(__pa(_stext), _end - _stext);
/*第282行,在0节点中保留内核镜像部分的内存,大概是0x30108000到以后的5M左右(解压后的内核)。
参看arch/arm/kernel/vmlinux.lds文件,
375 SECTIONS
376 {
377  . = 0xC0000000 + 0x00108000;
378  .init : { /* Init code and data                */
379   _stext = .;
所以stext等于c0108000,对应物理地址30108000,
对_end再参看arch/arm/kernel/vmlinux.lds.S文件,它也在SECTIONS区,在233行定义
232         BSS_SECTION(0, 0, 0)
233         _end = .;
对于我移植的2.3.36内核,_end等于c0555b60,对应物理地址30555b60,在此语句后添加打印信息得到
_end - _stext=44db60,4.512608M大小,所以这一句功能就是把0x30108000 ~ 0x30555b60这段(4.512608M)空间保留下来。             
*/
283 #endif
284 #ifdef CONFIG_BLK_DEV_INITRD //本开发板没配置
285         if (phys_initrd_size) {
286                 memblock_reserve(phys_initrd_start, phys_initrd_size);
287
288                 /* Now convert initrd to virtual addresses */
289                 initrd_start = __phys_to_virt(phys_initrd_start);
290                 initrd_end = initrd_start + phys_initrd_size;
291         }
292 #endif
293
294         arm_mm_memblock_reserve();//就是把0x30104000 ~ 0x30108000这段(16K)用于页目录的内容保留下来。
295
296         /* reserve any platform specific memblock areas */
297         if (mdesc->reserve)//对st2410中没有定义
298                 mdesc->reserve();
299
300         memblock_analyze();//计算内存块大小
301         memblock_dump_all();//显示
302 }
 内存原始数据由u-boot传入,在初始化完memblock_init后,arm_memblock_init函数中memblock_add调用 memblock_add_region加入原始内存数据,我的板子上配了64M内存,即:0x0000 0000->0x40000000,加完后的配置如下:
MEMBLOCK configuration:                                                        
 rmo_size    = 0x0                                                             
 memory.size = 0x4000000                                                       
 memory.cnt  = 0x1                                                             
 memory[0x0]    0x0000000030000000 - 0x0000000033ffffff, 0x4000000 bytes       
 reserved.cnt  = 0x1                                                           
 reserved[0x0]  0x0000000030104000 - 0x0000000030555b5f, 0x451b60 bytes 

memblock_init()在mm/memblock.c里面被定义。
void __init memblock_init(void)
{
    /* Create a dummy zero size MEMBLOCK which will get coalesced away later.
     * This simplifies the memblock_add() code below...
     */
    memblock.memory.region[0].base = 0;
    memblock.memory.region[0].size = 0;
    memblock.memory.cnt = 1;

    /* Ditto. */
    memblock.reserved.region[0].base = 0;
    memblock.reserved.region[0].size = 0;
    memblock.reserved.cnt = 1;
}
其作用就是初始化memblock这个结构。将他们清空。memblock包含两个重要的成员,分别是memblock.memory和memblock.reserved.其分别代表系统中可用的内存和已经被保留的内存。
memblock.memory和memblock.reserved被定义为以下结构:include/linux/memblock.h
#define MAX_MEMBLOCK_REGIONS 128

struct memblock_property {
    u64 base;
    u64 size;
};

struct memblock_region {
    unsigned long cnt;
    u64 size;
    struct memblock_property region[MAX_MEMBLOCK_REGIONS+1];
};

struct memblock {
    unsigned long debug;
    u64 rmo_size;
    struct memblock_region memory;
    struct memblock_region reserved;
};


mm/memblock.c
long memblock_add(u64 base, u64 size)
{
    struct memblock_region *_rgn = &memblock.memory;

    /* On pSeries LPAR systems, the first MEMBLOCK is our RMO region. */
    if (base == 0)
        memblock.rmo_size = size;

    return memblock_add_region(_rgn, base, size);

}

static long memblock_add_region(struct memblock_region *rgn, u64 base, u64 size)
{
    unsigned long coalesced = 0;
    long adjacent, i;

    if ((rgn->cnt == 1) && (rgn->region[0].size == 0)) {
        rgn->region[0].base = base;
        rgn->region[0].size = size;
        return 0;
    }

    /* First try and coalesce this MEMBLOCK with another. */
    for (i = 0; i < rgn->cnt; i++) {
        u64 rgnbase = rgn->region[i].base;
        u64 rgnsize = rgn->region[i].size;

        if ((rgnbase == base) && (rgnsize == size))
            /* Already have this region, so we're done */
            return 0;

        adjacent = memblock_addrs_adjacent(base, size, rgnbase, rgnsize);
        if (adjacent > 0) {
            rgn->region[i].base -= size;
            rgn->region[i].size += size;
            coalesced++;
            break;
        } else if (adjacent < 0) {
            rgn->region[i].size += size;
            coalesced++;
            break;
        }
    }

    if ((i < rgn->cnt - 1) && memblock_regions_adjacent(rgn, i, i+1)) {
        memblock_coalesce_regions(rgn, i, i+1);
        coalesced++;
    }

    if (coalesced)
        return coalesced;
    if (rgn->cnt >= MAX_MEMBLOCK_REGIONS)
        return -1;

    /* Couldn't coalesce the MEMBLOCK, so add it to the sorted table. */
    for (i = rgn->cnt - 1; i >= 0; i--) {
        if (base < rgn->region[i].base) {
            rgn->region[i+1].base = rgn->region[i].base;
            rgn->region[i+1].size = rgn->region[i].size;
        } else {
            rgn->region[i+1].base = base;
            rgn->region[i+1].size = size;
            break;
        }
    }

    if (base < rgn->region[0].base) {
        rgn->region[0].base = base;
        rgn->region[0].size = size;
    }
    rgn->cnt++;

    return 0;
}
memblock_add_region函数作用是将给定的物理地址所指定的memory region加入到指定的memblock(memblock.reserved或者是memblock.memory)中。新加入的memory region需要经过检查,如果与原先的memory region有重叠,则需要合并在原先的memory region中,否则的话就新建一个memory region.
3. memblock_reserve用来分配内存页。
以分配内核本身占用的内存为例:
/* Register the kernel text, kernel data and initrd with memblock. */
memblock_reserve(__pa(_stext), _end - _stext);
_stext,_end参见arch/arm/kernel/vmlinux.lds.S 链接脚本
long __init memblock_reserve(u64 base, u64 size)
{
    struct memblock_region *_rgn = &memblock.reserved;

    BUG_ON(0 == size);

    return memblock_add_region(_rgn, base, size);
}
分配完之后的内存配置为:
MEMBLOCK configuration:
 rmo_size    = 0x40000000
 memory.size = 0x0
 memory.cnt  = 0x1
 memory[0x0]    0x0000000000000000 - 0x000000003fffffff, 0x40000000 bytes
 reserved.cnt  = 0x1
 reserved[0x0]  0x0000000000000000 - 0x0000000000xxxxx,  _end - _stext bytes
如果分配有连续则进行合并。
几次分配后的配置如下:
MEMBLOCK configuration:
 rmo_size    = 0x80000000
 memory.size = 0x80000000
 memory.cnt  = 0x1
 memory[0x0]    0x0000000000000000 - 0x000000007fffffff, 0x80000000 bytes
 reserved.cnt  = 0x6
 reserved[0x0]  0x0000000000000000 - 0x00000000006b0fff, 0x6b1000 bytes
 reserved[0x1]  0x0000000000ffa000 - 0x0000000000ffcfff, 0x3000 bytes
 reserved[0x2]  0x000000002fbc4000 - 0x000000002fbdefff, 0x1b000 bytes
 reserved[0x3]  0x000000002fbdfa88 - 0x000000002ffff4cc, 0x41fa45 bytes
 reserved[0x4]  0x000000002fbe4000 - 0x000000002ffff4cd, 0x41b4ce bytes
 reserved[0x5]  0x000000007ffff000 - 0x000000007fffffff, 0x1000 bytes
init

arch/arm/mm/mmu.c
/*
 * Reserve the special regions of memory
 */
void __init arm_mm_memblock_reserve(void)
{
    /*
     * Reserve the page tables.  These are already in use,
     * and can only be in node 0.
     */
    memblock_reserve(__pa(swapper_pg_dir), PTRS_PER_PGD * sizeof(pgd_t));
/*
相关的信息
swapper_pg_dir是初始化页表虚拟地址,它在
./arch/arm/kernel/head.S定义
44:    .globl  swapper_pg_dir
45:    .equ    swapper_pg_dir, KERNEL_RAM_VADDR - 0x4000
./arch/arm/kernel/head.S:55:#define KERNEL_START        KERNEL_RAM_VADDR
所以swapper_pg_dir等于0xc0104000,对应物理地址0x30104000
PTRS_PER_PGD在arch/arm/include/asm/pgtable.h第103行定义
#define PTRS_PER_PGD        2048
而pgd_t定义为
typedef unsigned long pgd_t[2];
PTRS_PER_PGD * sizeof(pgd_t)=2048*8=16384的大小为0x00004000 (16K)
就是把0x30104000 ~ 0x30108000这段(16K)用于页目录的内容保留下来。
*/
#ifdef CONFIG_SA1111 //2410没定义
    /*
     * Because of the SA1111 DMA bug, we want to preserve our
     * precious DMA-able memory...
     */
    memblock_reserve(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET);
#endif
}

arch/arm/mm/init.c
#define MLK(b, t) b, t, ((t) - (b)) >> 10 //右移10位为K
#define MLM(b, t) b, t, ((t) - (b)) >> 20  //右移20位为M
#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
        printk(KERN_NOTICE "Virtual kernel memory layout:\n"
                        "    vector  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
                        "    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
#ifdef CONFIG_MMU
                        "    DMA     : 0x%08lx - 0x%08lx   (%4ld MB)\n"
#endif
                        "    vmalloc : 0x%08lx - 0x%08lx   (%4ld MB)\n"
                        "    lowmem  : 0x%08lx - 0x%08lx   (%4ld MB)\n"
#ifdef CONFIG_HIGHMEM
                        "    pkmap   : 0x%08lx - 0x%08lx   (%4ld MB)\n"
#endif
                        "    modules : 0x%08lx - 0x%08lx   (%4ld MB)\n"
                        "      .init : 0x%p" " - 0x%p" "   (%4d kB)\n"
                        "      .text : 0x%p" " - 0x%p" "   (%4d kB)\n"
                        "      .data : 0x%p" " - 0x%p" "   (%4d kB)\n",

                        MLK(UL(CONFIG_VECTORS_BASE), UL(CONFIG_VECTORS_BASE) +
                                (PAGE_SIZE)),
                        MLK(FIXADDR_START, FIXADDR_TOP),
#ifdef CONFIG_MMU
                        MLM(CONSISTENT_BASE, CONSISTENT_END),
#endif
                        MLM(VMALLOC_START, VMALLOC_END),
                        MLM(PAGE_OFFSET, (unsigned long)high_memory),
#ifdef CONFIG_HIGHMEM
                        MLM(PKMAP_BASE, (PKMAP_BASE) + (LAST_PKMAP) *
                                (PAGE_SIZE)),
#endif
                        MLM(MODULES_VADDR, MODULES_END),

                        MLK_ROUNDUP(__init_begin, __init_end),
                        MLK_ROUNDUP(_text, _etext),
                        MLK_ROUNDUP(_data, _edata));

 系统启动的时候打印出:
stext:c0108000,__pa_stext:30108000, _end - _stext=44db60              
_end:c0555b60,__pa(_end)=30555b60    
Memory: 64MB = 64MB total                                             
Memory: 60536k/60536k available, 5000k reserved, 0K highmem           
Virtual kernel memory layout:                                         
    vector  : 0xffff0000 - 0xffff1000   (   4 kB)                     
    fixmap  : 0xfff00000 - 0xfffe0000   ( 896 kB)                     
    DMA     : 0xffc00000 - 0xffe00000   (   2 MB)                     
    vmalloc : 0xc4800000 - 0xe0000000   ( 440 MB)                     
    lowmem  : 0xc0000000 - 0xc4000000   (  64 MB)                     
    modules : 0xbf000000 - 0xc0000000   (  16 MB)                     
      .init : 0xc0108000 - 0xc012e000   ( 152 kB)---->(系统启动过程中 以__init宏标识的函数占用的空间 被vmlinux.lds标识为 __init_begin __init_end 启动init进程前被释放掉 :Freeing init memory: 152K)                     
      .text : 0xc012e000 - 0xc04dd000   (3772 kB)                     
      .data : 0xc04fe000 - 0xc0527500   ( 166 kB)                     
Hierarchical RCU implementation.
启动后查看内存命令显示如下:
[root@localhost /]# free                                              
              total         used         free       shared      buffers
  Mem:        60688         6644        54044            0            0
 Swap:            0            0            0                         
Total:        60688         6644        54044                         
[root@localhost /]#   

保留内存5000K中 linux内核占用了152+3772+166=4090K
加上boot的时候参数等平台空间占用32K = 3149K
其它还差3464-3149=315K 没有着落
其它还包括影射的向量表4K
还有可能是系统管理内存的页表占用空间等其它的占用

系统启动成功后 Freeing init memory: 100K
所以用free命令看会多了100K

而free命令中看到的used基本是缓存 buffer占用的,为了提高i/o速度的缓存,很多都并非真正应用
在我应用中去试图malloc更多的内存的时候 used中很多都能被malloc出来的.

mm/memblock.c
void __init memblock_analyze(void)
{
    int i;

    memblock.memory.size = 0;

    for (i = 0; i < memblock.memory.cnt; i++)
        memblock.memory.size += memblock.memory.region[i].size;
}
mm/memblock.c
static void memblock_dump(struct memblock_region *region, char *name)
{
    unsigned long long base, size;
    int i;

    pr_info(" %s.cnt  = 0x%lx\n", name, region->cnt);

    for (i = 0; i < region->cnt; i++) {
        base = region->region[i].base;
        size = region->region[i].size;

        pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n",
            name, i, base, base + size - 1, size);
    }
}
mm/memblock.c
void memblock_dump_all(void)
{
    if (!memblock_debug)
        return;

    pr_info("MEMBLOCK configuration:\n");
    pr_info(" rmo_size    = 0x%llx\n", (unsigned long long)memblock.rmo_size);
    pr_info(" memory.size = 0x%llx\n", (unsigned long long)memblock.memory.size);

    memblock_dump(&memblock.memory, "memory");
    memblock_dump(&memblock.reserved, "reserved");
}

 22 static int memblock_debug;
 23
 24 static int __init early_memblock(char *p)
 25 {
 26         if (p && strstr(p, "debug"))
 27                 memblock_debug = 1;
 28         return 0;
 29 }
 30 early_param("memblock", early_memblock);

阅读(9608) | 评论(0) | 转发(7) |
给主人留下些什么吧!~~