Linux Kernel: 3.4.0
在linux Kernel中,一开始内存相关的信息是由struct meminfo来保存的,每个物理连续的内存区域被保存为meminfo中的一个元素,也就是说在Linux使用中,整块物理内存可能是不连续的,可能其 中某一中间区域是被其他cpu给使用掉了。
那么内存相关信息又是从哪里收集到的呢,系统在boot阶段,如u-boot会将当前物理内存linux可以使用的部分通过TAG的形式传递给linux 内核。Qualcomm使用的是叫lk的boot,不管用的是哪种boot类型,使用TAG来传递参数的原理是一样的。
下面我们看下Linux内核是如何收集内存信息的。
Meminfo信息收集
系统启动有如下流程:
start_kernel -> setup_arch -> setup_machine_tags-> parse_tags -> parse_tag.
-
static int __init parse_tag(const struct tag *tag)
-
{
-
extern struct tagtable __tagtable_begin, __tagtable_end;
-
struct tagtable *t;
-
-
for (t = &__tagtable_begin; t < &__tagtable_end; t++)
-
if (tag->hdr.tag == t->tag) {
-
t->parse(tag);
-
break;
-
}
-
-
return t < &__tagtable_end;
-
}
-
static int __init parse_tag(const struct tag *tag)
-
{
-
extern struct tagtable __tagtable_begin, __tagtable_end;
-
struct tagtable *t;
-
-
for (t = &__tagtable_begin; t < &__tagtable_end; t++)
-
if (tag->hdr.tag == t->tag) {
-
t->parse(tag);
-
break;
-
}
-
-
return t < &__tagtable_end;
-
}
__tagtable_begin被定义在kernel/arch/arm/kernel/vmlinux.lds.S中:
-
.init.tagtable : {
-
__tagtable_begin = .;
-
*(.taglist.init)
-
__tagtable_end = .;
-
}
-
.init.tagtable : {
-
__tagtable_begin = .;
-
*(.taglist.init)
-
__tagtable_end = .;
-
}
另外,在arch/arm/kernel/setup.c中有如下函数定义:
-
static int __init parse_tag_mem32(const struct tag *tag)
-
{
-
return arm_add_memory(tag->u.mem.start, tag->u.mem.size);
-
}
-
__tagtable(ATAG_MEM, parse_tag_mem32);
-
static int __init parse_tag_mem32(const struct tag *tag)
-
{
-
return arm_add_memory(tag->u.mem.start, tag->u.mem.size);
-
}
-
__tagtable(ATAG_MEM, parse_tag_mem32);
__tagtable是个宏定义:
-
#define __tagtable(tag, fn) \
-
static const struct tagtable__tagtable_##fn __tag = { tag, fn }
-
#define __tagtable(tag, fn) \
-
static const struct tagtable__tagtable_##fn __tag = { tag, fn }
里面的__tag的宏定义又如下:
-
#define __tag __used__attribute__((__section__(".taglist.init")))
-
#define __tag __used__attribute__((__section__(".taglist.init")))
__attribute__是一个特殊的GNU关键字,在这里的用法是:告诉编译器需要将其作用的函数或者数据放入”.taglist.init”这一段区域。
也就是说由__tagtable定义的函数将会被放在section“.taglist.init” 这个区域,而且__tagtable_begin指向的就是这个区域的首地址。所以在parse_tag()做for循环调用的时候,
必然会调用到parse_tag_mem32()。
其中一点要注意的是,parse_tag_mem32()的TAG为ATAG_MEM, 所以在boot传过来的TAG参数如果是要定义为memory参数的话TAG一定要定义为ATAG_MEM,否则parse_tag_mem32()是无 法解析到的!
parse_tag_mem32()调用arm_add_memory().
/*start和size参数是从boot传过来的。*/
-
int __init arm_add_memory(phys_addr_t start, unsigned long size)
-
{
-
/*第一次进来meminfo.nr_banks值为0.*/
-
struct membank *bank = &meminfo.bank[meminfo.nr_banks];
-
/*最多能保存NR_BANKS个bank,本平台为8.*/
-
if (meminfo.nr_banks >= NR_BANKS) {
-
printk(KERN_CRIT "NR_BANKS too low, "
-
"ignoring memory at 0x%08llx\n", (long long)start);
-
return -EINVAL;
-
}
-
/*页对齐后保存物理起始地址。*/
-
size -= start & ~PAGE_MASK;
-
bank->start = PAGE_ALIGN(start);
-
/*保存本bank size.*/
-
bank->size = size & PAGE_MASK;
-
-
/*
-
* Check whether this memory region has non-zero size or
-
* invalid node number.
-
*/
-
if (bank->size == 0)
-
return -EINVAL;
-
/*记录当前拥有bank数量。*/
-
meminfo.nr_banks++;
-
return 0;
-
}
-
int __init arm_add_memory(phys_addr_t start, unsigned long size)
-
{
-
/*第一次进来meminfo.nr_banks值为0.*/
-
struct membank *bank = &meminfo.bank[meminfo.nr_banks];
-
/*最多能保存NR_BANKS个bank,本平台为8.*/
-
if (meminfo.nr_banks >= NR_BANKS) {
-
printk(KERN_CRIT "NR_BANKS too low, "
-
"ignoring memory at 0x%08llx\n", (long long)start);
-
return -EINVAL;
-
}
-
/*页对齐后保存物理起始地址。*/
-
size -= start & ~PAGE_MASK;
-
bank->start = PAGE_ALIGN(start);
-
/*保存本bank size.*/
-
bank->size = size & PAGE_MASK;
-
-
/*
-
* Check whether this memory region has non-zero size or
-
* invalid node number.
-
*/
-
if (bank->size == 0)
-
return -EINVAL;
-
/*记录当前拥有bank数量。*/
-
meminfo.nr_banks++;
-
return 0;
-
}
Meminfo检查
在meminfo信息收集完成之后,系统会先对它作一个检查:
Start_kernel -> setup_arch -> sanity_check_meminfo.
-
void __init sanity_check_meminfo(void)
-
{
-
int i, j, highmem = 0;
-
~~snip
-
/*对每个bank都做检查。*/
-
for (i = 0, j = 0; i < meminfo.nr_banks; i++) {
-
struct membank *bank = &meminfo.bank[j];
-
*bank = meminfo.bank[i];
-
/*这里表示是PAE扩展的情况???*/
-
if (bank->start > ULONG_MAX)
-
highmem = 1;
-
-
#ifdef CONFIG_HIGHMEM
-
/*如果物理地址比在vmalloc_min之上或者小于内核逻辑
-
映射地址空间(俗称lowmem或者地段内存),那么就被认为是高端内存。
-
vmalloc_min被定义为vmalloc的最低地址。关于vmalloc可以了解下linux
-
的虚拟内存空间布局划分。其实它和lowmem最高地址中间还留有8M的
-
空间防止越界。*/
-
if (__va(bank->start) >= vmalloc_min ||
-
__va(bank->start) < (void *)PAGE_OFFSET)
-
highmem = 1;
-
-
bank->highmem = highmem;
-
-
/*
-
* Split those memory banks which are partially overlapping
-
* the vmalloc area greatly simplifying things later.
-
*/
-
/*表示meminfo其中的一个bank的物理地址其中一部分处于
-
Lowmem,一部分却又处于Highmem,这种情况需要将bank再重新划分
-
成两个bank。*/
-
if (!highmem && __va(bank->start) < vmalloc_min &&
-
bank->size > vmalloc_min - __va(bank->start)) {
-
if (meminfo.nr_banks >= NR_BANKS) {
-
printk(KERN_CRIT "NR_BANKS too low, "
-
"ignoring high memory\n");
-
} else {
-
/*将当前跟着的bank元素都往后挪一个位置,以保存新划分出来的
-
Bank。*/
-
memmove(bank + 1, bank,
-
(meminfo.nr_banks - i) * sizeof(*bank));
-
meminfo.nr_banks++;
-
i++;
-
/*保存size和start,既然代码跑这里来了,肯定为highmem了。*/
-
bank[1].size -= vmalloc_min - __va(bank->start);
-
bank[1].start = __pa(vmalloc_min - 1) + 1;
-
bank[1].highmem = highmem = 1;
-
j++;
-
}
-
/*lowmem的size, start保持不变。*/
-
bank->size = vmalloc_min - __va(bank->start);
-
}
-
#else
-
bank->highmem = highmem;
-
/*系统没有enable high memory时直接忽略highmem.*/
-
/*
-
* Highmem banks not allowed with !CONFIG_HIGHMEM.
-
*/
-
if (highmem) {
-
printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx "
-
"(!CONFIG_HIGHMEM).\n",
-
(unsigned long long)bank->start,
-
(unsigned long long)bank->start + bank->size - 1);
-
continue;
-
}
-
/*判断物理起始地址是不是落在vmalloc区域,或者小于lowmem区域。*/
-
/*
-
* Check whether this memory bank would entirely overlap
-
* the vmalloc area.
-
*/
-
if (__va(bank->start) >= vmalloc_min ||
-
__va(bank->start) < (void *)PAGE_OFFSET) {
-
printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx "
-
"(vmalloc region overlap).\n",
-
(unsigned long long)bank->start,
-
(unsigned long long)bank->start + bank->size - 1);
-
continue;
-
}
-
/*判断物理结束地址是不是落在vmalloc区域*/
-
/*
-
* Check whether this memory bank would partially overlap
-
* the vmalloc area.
-
*/
-
if (__va(bank->start + bank->size) > vmalloc_min ||
-
__va(bank->start + bank->size) < __va(bank->start)) {
-
unsigned long newsize = vmalloc_min - __va(bank->start);
-
printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx "
-
"to -%.8llx (vmalloc region overlap).\n",
-
(unsigned long long)bank->start,
-
(unsigned long long)bank->start + bank->size - 1,
-
(unsigned long long)bank->start + newsize - 1);
-
bank->size = newsize;
-
}
-
#endif
-
/*当bank的结束地址比当前的arm_lowmem_limit 还要大的话重新更新。*/
-
if (!bank->highmem && bank->start + bank->size > arm_lowmem_limit)
-
arm_lowmem_limit = bank->start + bank->size;
-
-
j++;
-
}
-
#ifdef CONFIG_HIGHMEM
-
if (highmem) {
-
const char *reason = NULL;
-
/*vipt属于arm cache的一种模式,如果alias了vipt,那么Highmem就
-
不会被使用了。*/
-
if (cache_is_vipt_aliasing()) {
-
/*
-
* Interactions between kmap and other mappings
-
* make highmem support with aliasing VIPT caches
-
* rather difficult.
-
*/
-
reason = "with VIPT aliasing cache";
-
}
-
if (reason) {
-
printk(KERN_CRIT "HIGHMEM is not supported %s, ignoring high memory\n",
-
reason);
-
while (j > 0 && meminfo.bank[j - 1].highmem)
-
j--;
-
}
-
}
-
#endif
-
meminfo.nr_banks = j;
-
/* arm_lowmem_limit 以上都被认为是高端内存了。*/
-
high_memory = __va(arm_lowmem_limit - 1) + 1;
-
memblock_set_current_limit(arm_lowmem_limit);
-
}
-
void __init sanity_check_meminfo(void)
-
{
-
int i, j, highmem = 0;
-
~~snip
-
/*对每个bank都做检查。*/
-
for (i = 0, j = 0; i < meminfo.nr_banks; i++) {
-
struct membank *bank = &meminfo.bank[j];
-
*bank = meminfo.bank[i];
-
/*这里表示是PAE扩展的情况???*/
-
if (bank->start > ULONG_MAX)
-
highmem = 1;
-
-
#ifdef CONFIG_HIGHMEM
-
/*如果物理地址比在vmalloc_min之上或者小于内核逻辑
-
映射地址空间(俗称lowmem或者地段内存),那么就被认为是高端内存。
-
vmalloc_min被定义为vmalloc的最低地址。关于vmalloc可以了解下linux
-
的虚拟内存空间布局划分。其实它和lowmem最高地址中间还留有8M的
-
空间防止越界。*/
-
if (__va(bank->start) >= vmalloc_min ||
-
__va(bank->start) < (void *)PAGE_OFFSET)
-
highmem = 1;
-
-
bank->highmem = highmem;
-
-
/*
-
* Split those memory banks which are partially overlapping
-
* the vmalloc area greatly simplifying things later.
-
*/
-
/*表示meminfo其中的一个bank的物理地址其中一部分处于
-
Lowmem,一部分却又处于Highmem,这种情况需要将bank再重新划分
-
成两个bank。*/
-
if (!highmem && __va(bank->start) < vmalloc_min &&
-
bank->size > vmalloc_min - __va(bank->start)) {
-
if (meminfo.nr_banks >= NR_BANKS) {
-
printk(KERN_CRIT "NR_BANKS too low, "
-
"ignoring high memory\n");
-
} else {
-
/*将当前跟着的bank元素都往后挪一个位置,以保存新划分出来的
-
Bank。*/
-
memmove(bank + 1, bank,
-
(meminfo.nr_banks - i) * sizeof(*bank));
-
meminfo.nr_banks++;
-
i++;
-
/*保存size和start,既然代码跑这里来了,肯定为highmem了。*/
-
bank[1].size -= vmalloc_min - __va(bank->start);
-
bank[1].start = __pa(vmalloc_min - 1) + 1;
-
bank[1].highmem = highmem = 1;
-
j++;
-
}
-
/*lowmem的size, start保持不变。*/
-
bank->size = vmalloc_min - __va(bank->start);
-
}
-
#else
-
bank->highmem = highmem;
-
/*系统没有enable high memory时直接忽略highmem.*/
-
/*
-
* Highmem banks not allowed with !CONFIG_HIGHMEM.
-
*/
-
if (highmem) {
-
printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx "
-
"(!CONFIG_HIGHMEM).\n",
-
(unsigned long long)bank->start,
-
(unsigned long long)bank->start + bank->size - 1);
-
continue;
-
}
-
/*判断物理起始地址是不是落在vmalloc区域,或者小于lowmem区域。*/
-
/*
-
* Check whether this memory bank would entirely overlap
-
* the vmalloc area.
-
*/
-
if (__va(bank->start) >= vmalloc_min ||
-
__va(bank->start) < (void *)PAGE_OFFSET) {
-
printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx "
-
"(vmalloc region overlap).\n",
-
(unsigned long long)bank->start,
-
(unsigned long long)bank->start + bank->size - 1);
-
continue;
-
}
-
/*判断物理结束地址是不是落在vmalloc区域*/
-
/*
-
* Check whether this memory bank would partially overlap
-
* the vmalloc area.
-
*/
-
if (__va(bank->start + bank->size) > vmalloc_min ||
-
__va(bank->start + bank->size) < __va(bank->start)) {
-
unsigned long newsize = vmalloc_min - __va(bank->start);
-
printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx "
-
"to -%.8llx (vmalloc region overlap).\n",
-
(unsigned long long)bank->start,
-
(unsigned long long)bank->start + bank->size - 1,
-
(unsigned long long)bank->start + newsize - 1);
-
bank->size = newsize;
-
}
-
#endif
-
/*当bank的结束地址比当前的arm_lowmem_limit 还要大的话重新更新。*/
-
if (!bank->highmem && bank->start + bank->size > arm_lowmem_limit)
-
arm_lowmem_limit = bank->start + bank->size;
-
-
j++;
-
}
-
#ifdef CONFIG_HIGHMEM
-
if (highmem) {
-
const char *reason = NULL;
-
/*vipt属于arm cache的一种模式,如果alias了vipt,那么Highmem就
-
不会被使用了。*/
-
if (cache_is_vipt_aliasing()) {
-
/*
-
* Interactions between kmap and other mappings
-
* make highmem support with aliasing VIPT caches
-
* rather difficult.
-
*/
-
reason = "with VIPT aliasing cache";
-
}
-
if (reason) {
-
printk(KERN_CRIT "HIGHMEM is not supported %s, ignoring high memory\n",
-
reason);
-
while (j > 0 && meminfo.bank[j - 1].highmem)
-
j--;
-
}
-
}
-
#endif
-
meminfo.nr_banks = j;
-
/* arm_lowmem_limit 以上都被认为是高端内存了。*/
-
high_memory = __va(arm_lowmem_limit - 1) + 1;
-
memblock_set_current_limit(arm_lowmem_limit);
-
}
Vmalloc_min一开始编译的时候就被初始化的:
-
static void * __initdata vmalloc_min =
-
(void*)(VMALLOC_END - (240 << 20) - VMALLOC_OFFSET);
-
static void * __initdata vmalloc_min =
-
(void*)(VMALLOC_END - (240 << 20) - VMALLOC_OFFSET);
VMALLOC_END:表示vmalloc区域结束地址。
240<<20:vmalloc区域有240M大小。
VMALLOC_OFFSET:为8M。vmalloc区域和lowmem区域有8M的空闲区间,防止访问越界。
当然,vamlloc_min也可以通过cmdline的方式传到kernel中作修改。
-
static int __init early_vmalloc(char *arg)
-
{
-
/*将vmalloc size解析成unsigned long类型。*/
-
unsigned long vmalloc_reserve = memparse(arg, NULL);
-
-
if (vmalloc_reserve < SZ_16M) {
-
vmalloc_reserve = SZ_16M;
-
printk(KERN_WARNING
-
"vmalloc area too small, limiting to %luMB\n",
-
vmalloc_reserve >> 20);
-
}
-
-
if (vmalloc_reserve > VMALLOC_END - (PAGE_OFFSET + SZ_32M)) {
-
vmalloc_reserve = VMALLOC_END - (PAGE_OFFSET + SZ_32M);
-
printk(KERN_WARNING
-
"vmalloc area is too big, limiting to %luMB\n",
-
vmalloc_reserve >> 20);
-
}
-
/*改变vmalloc_min变量,这样就得到了自己想要的vmalloc size了。*/
-
vmalloc_min = (void *)(VMALLOC_END - vmalloc_reserve);
-
return 0;
-
}
-
early_param("vmalloc", early_vmalloc);
-
static int __init early_vmalloc(char *arg)
-
{
-
/*将vmalloc size解析成unsigned long类型。*/
-
unsigned long vmalloc_reserve = memparse(arg, NULL);
-
-
if (vmalloc_reserve < SZ_16M) {
-
vmalloc_reserve = SZ_16M;
-
printk(KERN_WARNING
-
"vmalloc area too small, limiting to %luMB\n",
-
vmalloc_reserve >> 20);
-
}
-
-
if (vmalloc_reserve > VMALLOC_END - (PAGE_OFFSET + SZ_32M)) {
-
vmalloc_reserve = VMALLOC_END - (PAGE_OFFSET + SZ_32M);
-
printk(KERN_WARNING
-
"vmalloc area is too big, limiting to %luMB\n",
-
vmalloc_reserve >> 20);
-
}
-
/*改变vmalloc_min变量,这样就得到了自己想要的vmalloc size了。*/
-
vmalloc_min = (void *)(VMALLOC_END - vmalloc_reserve);
-
return 0;
-
}
-
early_param("vmalloc", early_vmalloc);
vmalloc_min的变化也会导致lowmem也就是低端的内存大小的变化。所以实际应用中,high memory的定义并非一定像书上所说的为896M之上。
Meminfo使用:
做完了检查之后就是使用了,在使用部分,meminfo的信息其实都传给了一个叫structmemblock的结构,后续由它来完成内存区域信息 保存的责任。它会将一些必要的区域给保留出来供系统使用,例如kernel的text, code段。其他未使用部分系统才能使用。来看看实现函数arm_memblock_init().
-
void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc)
-
{
-
int i;
-
/*将struct meminfo的信息都放入到了struct memblock中去,它会将保留的区域和空闲的区域用memory 和reserved变量分别保存。*/
-
for (i = 0; i < mi->nr_banks; i++)
-
memblock_add(mi->bank[i].start, mi->bank[i].size);
-
-
/* kernel的text段需要作为保留部分。其实看system.map会发现
-
_stext为symbol里的其实地址,而_end为结束地址。所以这块memblock
-
Region包括了virtual memory layout中的.init, .bss, .data, .text这几个区域。*/
-
memblock_reserve(__pa(_stext), _end - _stext);
-
/* 本平台的phys_initrd_start 这里为0.*/
-
#ifdef CONFIG_BLK_DEV_INITRD
-
if (phys_initrd_size &&
-
!memblock_is_region_memory(phys_initrd_start, phys_initrd_size)) {
-
pr_err("INITRD: 0x%08lx+0x%08lx is not a memory region - disabling initrd\n",
-
phys_initrd_start, phy
阅读(1880) | 评论(0) | 转发(0) |