浅析linux内核内存管理之物理内存探测
作者:李万鹏
转载自:http://tomhibolu.iteye.com/blog/1214876
在系统boot的时候,kernel通过0x15中断获得机器内存容量。有三种参数88H(只能探测最大64MB的内存),E801H(得到大
小),E802H(获得memory map)。这个memory map称为E820图,在kernel的初始化代码中会将这个memory
map复制到一个kernel中的数据结构e820map里,kernel需要通过这个结构来计算可用的内存容量。
-
struct e820map {
-
int nr_map;
-
struct e820entry {
-
unsigned long long addr;
-
unsigned long long size;
-
unsigned long type;
-
} map[E820MAX];
-
};
-
这里的nr_map是内存段的数量
-
每个内存段由struct e820entry表示
-
addr字段表示内存段的起始地址
-
size字段表示内存段的大小
-
type表示内存段的类型,比如E820_RAM表示可用内存
-
E820MAX是一个宏,为32,说明最多可以有32个内存段
在setup_arch函数中有这么两句,调用mach_specific_memory_setup将E820图复制到kernel中的数据结构中,包括了系统保留的段和空闲段,通过print_memory_map函数打印出来。
-
printk(KERN_INFO "BIOS-provided physical RAM map:\n");
-
print_memory_map(machine_specific_memory_setup());
下面来看machine_specific_memory_setup函数的实现:
-
static char * __init machine_specific_memory_setup(void)
-
{
-
char *who;
-
-
-
who = "BIOS-e820";
-
-
-
-
-
-
-
-
sanitize_e820_map(E820_MAP, &E820_MAP_NR);
-
if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {
-
unsigned long mem_size;
-
-
-
if (ALT_MEM_K < EXT_MEM_K) {
-
mem_size = EXT_MEM_K;
-
who = "BIOS-88";
-
} else {
-
mem_size = ALT_MEM_K;
-
who = "BIOS-e801";
-
}
-
-
e820.nr_map = 0;
-
add_memory_region(0, LOWMEMSIZE(), E820_RAM);
-
add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
-
}
-
return who;
-
}
-
首先调用sanitize_e820_map函数将重叠的去除
-
调用copy_e820_map函数将E820图copy到struct e820map结构中
-
如果BIOS没有提供该信息(在较古老的机器上可能是这样),内存自身生成一个表,0~0x9f000 ,1MB~E801或88找到的最大值
看copy_e820_map函数实现:
-
static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
-
{
-
-
if (nr_map < 2)
-
return -1;
-
-
do {
-
unsigned long long start = biosmap->addr;
-
unsigned long long size = biosmap->size;
-
unsigned long long end = start + size;
-
unsigned long type = biosmap->type;
-
-
-
if (start > end)
-
return -1;
-
-
-
-
-
-
if (type == E820_RAM) {
-
if (start < 0x100000ULL && end > 0xA0000ULL) {
-
if (start < 0xA0000ULL)
-
add_memory_region(start, 0xA0000ULL-start, type);
-
if (end <= 0x100000ULL)
-
continue;
-
start = 0x100000ULL;
-
size = end - start;
-
}
-
}
-
add_memory_region(start, size, type);
-
} while (biosmap++,--nr_map);
-
return 0;
-
}
-
至少BIOS与RAM不是一个内存段的,所以nr_map < 2肯定是不对的
-
调用add_memory_region函数将E820图填充到struct e820map结构中
-
如果类型为E820_RAM,即可用内存,判断这个范围是否覆盖
640KB~1MB,这段需要为ISA图形卡等保留的,所以这段要保留,如果谁覆盖了这段需要把这段抠除。物理地址从0x000a0000到
0x000fffff的范围通常留给BIOS例程,并且映射ISA图形卡上的内部内存。这个区域就是所有的IBM兼容PC上从640KB~1MB之间著名
的空洞:物理地址存在但被保留,对应的页框不能由操作系统使用。
调用add_memory_region添加相应的内存段到e820map:
-
static void __init add_memory_region(unsigned long long start,
-
unsigned long long size, int type)
-
{
-
int x;
-
-
if (!efi_enabled) {
-
x = e820.nr_map;
-
-
if (x == E820MAX) {
-
printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
-
return;
-
}
-
-
e820.map[x].addr = start;
-
e820.map[x].size = size;
-
e820.map[x].type = type;
-
e820.nr_map++;
-
}
-
}
如果内存段数量达到了最大值E820MAX即32,则oops。
-
static void __init print_memory_map(char *who)
-
{
-
int i;
-
-
for (i = 0; i < e820.nr_map; i++) {
-
printk(" %s: %016Lx - %016Lx ", who,
-
e820.map[i].addr,
-
e820.map[i].addr + e820.map[i].size);
-
switch (e820.map[i].type) {
-
case E820_RAM: printk("(usable)\n");
-
break;
-
case E820_RESERVED:
-
printk("(reserved)\n");
-
break;
-
case E820_ACPI:
-
printk("(ACPI data)\n");
-
break;
-
case E820_NVS:
-
printk("(ACPI NVS)\n");
-
break;
-
default: printk("type %lu\n", e820.map[i].type);
-
break;
-
}
-
}
-
}
调用print_memory_map打印出各个内存段的范围和类型,我的内存是2G的,打印结果如下:
-
[ 0.000000] BIOS-provided physical RAM map:
-
[ 0.000000] BIOS-e820: 0000000000000000 - 000000000009f000 (usable)
-
[ 0.000000] BIOS-e820: 000000000009f000 - 00000000000a0000 (reserved)
-
[ 0.000000] BIOS-e820: 00000000000f0000 - 0000000000100000 (reserved)
-
[ 0.000000] BIOS-e820: 0000000000100000 - 0000000001e00000 (usable)
-
[ 0.000000] BIOS-e820: 0000000001e00000 - 0000000001e80040 (reserved)
-
[ 0.000000] BIOS-e820: 0000000001e80040 - 000000007bed0000 (usable)
-
[ 0.000000] BIOS-e820: 000000007bed0000 - 000000007bed3000 (ACPI NVS)
-
[ 0.000000] BIOS-e820: 000000007bed3000 - 000000007bee0000 (ACPI data)
-
[ 0.000000] BIOS-e820: 000000007bee0000 - 000000007bf00000 (reserved)
-
[ 0.000000] BIOS-e820: 000000007c000000 - 0000000080000000 (reserved)
-
[ 0.000000] BIOS-e820: 00000000f0000000 - 00000000f4000000 (reserved)
-
[ 0.000000] BIOS-e820: 00000000fec00000 - 0000000100000000 (reserved)
至此,kernel已经成功的通过0x 15参数E820H,得到BIOS的E820图,并将其填充内核中的e820map结构,供内核其他部分使用。
在setup_memory函数中会调用find_max_pfn,从e820map结构中获得可用内存的容量。下面来看几个宏:
-
#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-
#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
-
#define PFN_PHYS(x) ((x) << PAGE_SHIFT)
-
-
-
-
-
#define MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE)
-
#define MAXMEM_PFN PFN_DOWN(MAXMEM)
-
#define MAX_NONPAE_PFN (1 << 20)
-
PFN_UP,PFN_DOWN都是返回地址x对应的页帧号只是PFN_UP返回的是x地址下一个页的页帧号,PFN_DOWN返回的是x所在页的页帧号
-
PFN_PHYS获得页帧号对应的物理地址
-
MAXMEM是低端内存的最大值
-
MAXMEM_PFN是低端内存最大一个页的页帧号
-
MAX_NONPAE_PFN是给出4GB之上第一个页面的页面号
setup_memory是与体系结构密切相关的函数,跟踪其实现:
-
static unsigned long __init setup_memory(void)
-
{
-
unsigned long bootmap_size, start_pfn, max_low_pfn;
-
-
-
-
-
-
start_pfn = PFN_UP(init_pg_tables_end);
-
-
find_max_pfn();
-
-
max_low_pfn = find_max_low_pfn();
-
-
#ifdef CONFIG_HIGHMEM
-
highstart_pfn = highend_pfn = max_pfn;
-
if (max_pfn > max_low_pfn) {
-
highstart_pfn = max_low_pfn;
-
}
-
printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
-
pages_to_mb(highend_pfn - highstart_pfn));
-
#endif
-
printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
-
pages_to_mb(max_low_pfn));
-
-
-
-
bootmap_size = init_bootmem(start_pfn, max_low_pfn);
-
-
register_bootmem_low_pages(max_low_pfn);
-
-
-
-
-
-
-
-
reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
-
bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
-
-
-
-
-
-
reserve_bootmem(0, PAGE_SIZE);
-
-
。。。。。。。。。。。。
-
return max_low_pfn;
-
}
-
PFN_UP获得_end后第一个page的页帧号来初始化start_pfn
-
调用find_max_low_pfn获得低端内存的最大页帧号
-
如果配置了CONFIG_HIGHMEM,则初始化highstart_pfn变量
-
调用init_bootmem初始化bootmem allocator
-
将max_low_pfn,即直接内存映射部分的page设置为可用
-
保留内核镜像(从0x100000开始,kernel code, kernel data, kernel bss),page 0,bootmem allocator的bitmap占用的空间
-
然会低端内存的最大页帧号
下面来看查找最大内存的函数:
-
void __init find_max_pfn(void)
-
{
-
int i;
-
-
max_pfn = 0;
-
if (efi_enabled) {
-
efi_memmap_walk(efi_find_max_pfn, &max_pfn);
-
return;
-
}
-
-
for (i = 0; i < e820.nr_map; i++) {
-
unsigned long start, end;
-
-
if (e820.map[i].type != E820_RAM)
-
continue;
-
start = PFN_UP(e820.map[i].addr);
-
end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
-
if (start >= end)
-
continue;
-
if (end > max_pfn)
-
max_pfn = end;
-
}
-
}
-
unsigned long __init find_max_low_pfn(void)
-
{
-
unsigned long max_low_pfn;
-
-
max_low_pfn = max_pfn;
-
if (max_low_pfn > MAXMEM_PFN) {
-
if (highmem_pages == -1)
-
highmem_pages = max_pfn - MAXMEM_PFN;
-
if (highmem_pages + MAXMEM_PFN < max_pfn)
-
max_pfn = MAXMEM_PFN + highmem_pages;
-
if (highmem_pages + MAXMEM_PFN > max_pfn) {
-
printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
-
highmem_pages = 0;
-
}
-
max_low_pfn = MAXMEM_PFN;
-
#ifndef CONFIG_HIGHMEM
-
-
printk(KERN_WARNING "Warning only %ldMB will be used.\n",
-
MAXMEM>>20);
-
if (max_pfn > MAX_NONPAE_PFN)
-
printk(KERN_WARNING "Use a PAE enabled kernel.\n");
-
else
-
printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
-
max_pfn = MAXMEM_PFN;
-
#else /* !CONFIG_HIGHMEM */
-
#ifndef CONFIG_X86_PAE
-
if (max_pfn > MAX_NONPAE_PFN) {
-
max_pfn = MAX_NONPAE_PFN;
-
printk(KERN_WARNING "Warning only 4GB will be used.\n");
-
printk(KERN_WARNING "Use a PAE enabled kernel.\n");
-
}
-
#endif /* !CONFIG_X86_PAE */
-
#endif /* !CONFIG_HIGHMEM */
-
} else {
-
if (highmem_pages == -1)
-
highmem_pages = 0;
-
#ifdef CONFIG_HIGHMEM
-
if (highmem_pages >= max_pfn) {
-
printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
-
highmem_pages = 0;
-
}
-
if (highmem_pages) {
-
if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
-
printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
-
highmem_pages = 0;
-
}
-
max_low_pfn -= highmem_pages;
-
}
-
#else
-
if (highmem_pages)
-
printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
-
#endif
-
}
-
return max_low_pfn;
-
}
-
这里分两种情况进行讨论,一个是内存大于896MB,一个是内存小于896MB
-
max_low_pfn > MAXMEM_PFN下的#ifndef CONFIG_HIGHMEM会设置max_pfn = MAXMEM_PFN;看出如果不开启highmem,即使内存大于896MB,也只能使用896MB
阅读(2052) | 评论(0) | 转发(0) |