Bootmem分配器使用位图来管理页,位图数量和系统的物理内存也数量是相同的。当页被使用时,就标记为1,否则为0表示空闲页。
由于该分配器管理机制比较简单,并没有考虑性能和通用性,所以在伙伴系统完成初始化之后,bootmem分配器就要交出管理权,然后销毁掉。
在UMA系统上,只有一个bootmemallocator,名字叫bootmem_node_data, 位于kernel/mm/bootmem.c中,它通过成为struct pglist_data的一个元素与变量contig_page_data联系起来。
-
struct pglist_data __refdata contig_page_data = {
-
.bdata = &bootmem_node_data[0]
-
};
-
bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
Bootmem 初始化:
系统启动的时候有如下流程:
start_kernel -> setup_arch -> paging_init-> bootmem_init
-
void __init bootmem_init(void)
-
{
-
unsigned long min, max_low, max_high;
-
-
max_low = max_high = 0;
-
/*min: 物理内存起始地址pfn号
-
max_low: 低端内存结束地址pfn号
-
max_high: 高端内存结束地址pfn号
-
*/
-
find_limits(&min, &max_low, &max_high);
-
/*根据参数看是初始化Lowmem区域?*/
-
arm_bootmem_init(min, max_low);
-
-
/*
-
* Sparsemem tries to allocate bootmem in memory_present(),
-
* so must be done after the fixed reservations
-
*/
-
arm_memory_present();
-
-
/*
-
* sparse_init() needs the bootmem allocator up and running.
-
*/
-
sparse_init();
-
/*
-
* Now free the memory - free_area_init_node needs
-
* the sparse mem_map arrays initialized by sparse_init()
-
* for memmap_init_zone(), otherwise all PFNs are invalid.
-
*/
-
arm_bootmem_free(min, max_low, max_high);
-
-
/*保存lowmem和highmem对应的pfn numbers,
-
这并表示实际能操作的pfn number,因为start pfn不一定从0开始。*/
-
max_low_pfn = max_low - PHYS_PFN_OFFSET;
-
max_pfn = max_high - PHYS_PFN_OFFSET;
-
}
find_limits():
里面有些函数需要展开来分析下,先看find_limits:
-
static void __init find_limits(unsigned long *min, unsigned long *max_low,
-
unsigned long *max_high)
-
{
-
struct meminfo *mi = &meminfo;
-
int i;
-
/*循环直到是highmem的bank才停止。*/
-
/* This assumes the meminfo array is properly sorted */
-
*min = bank_pfn_start(&mi->bank[0]);
-
for_each_bank (i, mi)
-
if (mi->bank[i].highmem)
-
break;
-
/*获得lowmem和highmem结束地址的pfn.*/
-
*max_low = bank_pfn_end(&mi->bank[i - 1]);
-
*max_high = bank_pfn_end(&mi->bank[mi->nr_banks - 1]);
-
}
函数比较简单,不过要注意的是,这里获得的内存是内核当前拥有的memory,当然也包含了已经被reserved的区域。arm_bootmem_init()会重新划分。
arm_bootmem_init():
arm_bootmem_init()是核心的函数。
-
static void __init arm_bootmem_init(unsigned long start_pfn,
-
unsigned long end_pfn)
-
{
-
struct memblock_region *reg;
-
unsigned int boot_pages;
-
phys_addr_t bitmap;
-
pg_data_t *pgdat;
-
-
/*
-
* Allocate the bootmem bitmap page. This must be in a region
-
* of memory which has already been mapped.
-
*/
-
/*end_pfn – start_pfn为lowmem的pfn numbers。*/
-
boot_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
-
/*根据pfn numbers来分配bitmap位图, 以L1 cache能操作的字节数作对齐,
-
是为了让L1 cache能操作? 最大能分配地址为end_pfn。*/
-
bitmap = memblock_alloc_base(boot_pages << PAGE_SHIFT, L1_CACHE_BYTES,
-
__pfn_to_phys(end_pfn));
-
-
/*
-
* Initialise the bootmem allocator, handing the
-
* memory banks over to bootmem.
-
*/
-
node_set_online(0);
-
pgdat = NODE_DATA(0);
-
/*初始化pgda也就是全局变量contig_page_data 中的bdata元素,也就是
-
bootmem_node_data 变量。*/
-
init_bootmem_node(pgdat, __phys_to_pfn(bitmap), start_pfn, end_pfn);
-
/*在前面meminfo介绍中有说到,struct memblock中的memroy 元素表示
-
空闲内存区域,而reseved表示要保留的区域。所以这里会将reserved对应的
-
Bitmap标为1,而free memory标志为0.*/
-
/* Free the lowmem regions from memblock into bootmem. */
-
for_each_memblock(memory, reg) {
-
unsigned long start = memblock_region_memory_base_pfn(reg);
-
unsigned long end = memblock_region_memory_end_pfn(reg);
-
-
if (end >= end_pfn)
-
end = end_pfn;
-
if (start >= end)
-
break;
-
-
free_bootmem(__pfn_to_phys(start), (end - start) << PAGE_SHIFT);
-
}
-
-
/* Reserve the lowmem memblock reserved regions in bootmem. */
-
for_each_memblock(reserved, reg) {
-
unsigned long start = memblock_region_reserved_base_pfn(reg);
-
unsigned long end = memblock_region_reserved_end_pfn(reg);
-
-
if (end >= end_pfn)
-
end = end_pfn;
-
if (start >= end)
-
break;
-
-
reserve_bootmem(__pfn_to_phys(start),
-
(end - start) << PAGE_SHIFT, BOOTMEM_DEFAULT);
-
}
-
}
bootmem_bootmap_pages():
继续分解函数,先看bootmem_bootmap_pages().
-
unsigned long __init bootmem_bootmap_pages(unsigned long pages)
-
{
-
unsigned long bytes = bootmap_bytes(pages);
-
/*以4k作为一个单位分配*/
-
return PAGE_ALIGN(bytes) >> PAGE_SHIFT;
-
}
-
static unsigned long __init bootmap_bytes(unsigned long pages)
-
{
-
/*每个page作为一个bit保留在unsigned long变量中。 */
-
unsigned long bytes = DIV_ROUND_UP(pages, 8);
-
-
return ALIGN(bytes, sizeof(long));
-
}
memblock_alloc_base():
-
phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
-
{
-
phys_addr_t alloc;
-
alloc = __memblock_alloc_base(size, align, max_addr);
-
if (alloc == 0)
-
panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n",
-
(unsigned long long) size, (unsigned long long) max_addr);
-
-
return alloc;
-
}
-
phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
-
{
-
return memblock_alloc_base_nid(size, align, max_addr, MAX_NUMNODES);
-
}
-
static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
-
phys_addr_t align, phys_addr_t max_addr,
-
int nid)
-
{
-
phys_addr_t found;
-
size = round_up(size, align);
-
found = memblock_find_in_range_node(0, max_addr, size, align, nid);
-
if (found && !memblock_reserve(found, size))
-
return found;
-
return 0;
-
}
-
phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
-
phys_addr_t end, phys_addr_t size,
-
phys_addr_t align, int nid)
-
{
-
phys_addr_t this_start, this_end, cand;
-
u64 i;
-
/* avoid allocating the first page */
-
/*保留第一页,用来干嘛?*/
-
start = max_t(phys_addr_t, start, PAGE_SIZE);
-
end = max(start, end);
-
/*从struct memblock的一块空闲区域的最高地址往下分配一块区域。*/
-
for_each_free_mem_range_reverse(i, nid, &this_start, &this_end, NULL) {
-
/*this_start 和this_end 在start 和end中间的话直接返回,否则
-
返回end。*/
-
this_start = clamp(this_start, start, end);
-
this_end = clamp(this_end, start, end);
-
if (this_end < size)
-
continue;
-
/*得到分配内存地址,大小为size。*/
-
cand = round_down(this_end - size, align);
-
if (cand >= this_start)
-
return cand;
-
}
-
return 0;
-
}
init_bootmem_node():
-
unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
-
unsigned long startpfn, unsigned long endpfn)
-
{
-
return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn);
-
}
-
static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,
-
unsigned long mapstart, unsigned long start, unsigned long end)
-
{
-
unsigned long mapsize;
-
-
mminit_validate_memmodel_limits(&start, &end);
-
/*得到bitmap表的首地址以及最小和最大pfn*/
-
bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));
-
bdata->node_min_pfn = start;
-
bdata->node_low_pfn = end;
-
/*加入到全局的bdata_list链表变量中,方便管理。*/
-
link_bootmem(bdata);
-
-
/*
-
* Initially all pages are reserved - setup_arch() has to
-
* register free RAM areas explicitly.
-
*/
-
/*将bitmap表中每个bit都设置成已经使用了。下一步
-
for_each_memblock()会重新设置。*/
-
mapsize = bootmap_bytes(end - start);
-
memset(bdata->node_bootmem_map, 0xff, mapsize);
-
-
bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n",
-
bdata - bootmem_node_data, start, mapstart, end, mapsize);
-
-
return mapsize;
-
}
free_bootmem()/reserve_bootmem():
这两个函数比较简单了,表示将bootmem的页分别标记成空闲和使用中。
到此,bootmem allocator已经初始化完成。
Bootmem内存分配:
Bootmem的分配有多种接口,不过最终调用的都是__alloc_bootmem(),而__alloc_bootmem()调用了___alloc_bootmem_nopanic()。
路径: kernel/kernel/include/linux/bootmem.h
-
/*按指定size从ZONE_NORMAL区域分配*/
-
#define alloc_bootmem(x) \
-
__alloc_bootmem(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
-
/*按指定size从ZONE_NORMAL区域分配, 以align对齐*/
-
#define alloc_bootmem_align(x, align) \
-
__alloc_bootmem(x, align, BOOTMEM_LOW_LIMIT)
-
/*按指定size从ZONE_NORMAL区域分配, 以一页对齐*/
-
#define alloc_bootmem_pages(x) \
-
__alloc_bootmem(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
-
/* SMP_CACHE_BYTES 是为了让数据能更好地在L1 cache中使用,虽然是SMP开头。*/
-
#define alloc_bootmem_nopanic(x) \
-
__alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
-
#define alloc_bootmem_node(pgdat, x) \
-
__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
-
-
/*以_node后缀结尾表示只在NUMA系统上使用。*/
-
#define alloc_bootmem_node(pgdat, x) \
-
__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
-
#define alloc_bootmem_node_nopanic(pgdat, x) \
-
__alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
-
#define alloc_bootmem_pages_node(pgdat, x) \
-
__alloc_bootmem_node(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
-
#define alloc_bootmem_pages_node_nopanic(pgdat, x) \
-
__alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
-
-
/*这几个和上面的区别是从ZONE_DMA区域分配。*/
-
#define alloc_bootmem_low(x) \
-
__alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
-
#define alloc_bootmem_low_pages(x) \
-
__alloc_bootmem_low(x, PAGE_SIZE, 0)
-
#define alloc_bootmem_low_pages_node(pgdat, x) \
-
__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
关于内存的分配,请允许我偷懒下,不对代码做详细分析了,有点费时间,流程如下:
__alloc_bootmem -> ___alloc_bootmem -> ___alloc_bootmem_nopanic -> alloc_bootmem_core -> find_next_zero_bit
大概的步骤就是:
1. 扫描bitmap位图,寻找空闲的位
2. 如果查找的页紧挨着上一次分配的页,就先检查这次要分配的内存是否能在上一页直接分配,因为bootmem allocator支持小于一页的分配。
3. 在新分配的页的bitmap对应的bit设置为1后,将当前的偏移保存,如果页没有完全分配,那么页里面的偏移量也保存。
Bootmem内存释放:
内核提供的释放bootmem接口是free_bootmem(unsignedlong addr, unsigned long size), 还有一个是用于NUMA的。
这个接口没有分析,据资料记载说分配页可能会有风险。
Bootmem停用:
一旦伙伴系统初始化完成之后,bootmemallocator就要停止使用了,系统是通过函数free_all_bootmem()来停止的,有如下调用流程:
start_kernel -> mm_init -> mem_init -> free_all_bootmem -> free_all_bootmem_core -> __free_pages_bootmem -> __free_pages
可以看到最终调用的是__free_pages(),这个函数会将这些Pages释放到伙伴系统中管理。
注意这里只是将空闲的页释放掉了。占据的页还存在。
由于bootmem分配的页里面的数据基本上都是用于内存基本结构,在系统运行期间会一直被用到,所以不会被释放。不过像__init这种类型的数据段只在系统开机的时候被使用,所以系统初始化完成之后就可以释放掉了。
系统使用的函数是free_initmem(), 调用流程如下:
start_kernel -> rest_init -> kernel_init -> init_post -> free_initmem
-
void free_initmem(void)
-
{
-
unsigned long reclaimed_initmem;
-
-
poison_init_mem(__init_begin, __init_end - __init_begin);
-
if (!machine_is_integrator() && !machine_is_cintegrator()) {
-
reclaimed_initmem = free_area(__phys_to_pfn(__pa(__init_begin)),
-
__phys_to_pfn(__pa(__init_end)),
-
"init");
-
totalram_pages += reclaimed_initmem;
-
}
-
}
可以看到,释放的数据就是保存在__init_begin和__init_end那一段之间!当然,最后也是调用__free_pages()释放到伙伴系统中管理的。
使用bootmem分配内存的时候需要注意下一定要处于bootmem 分配器初始化和销毁之间!
2013/03/21