前面已经分析了linux内存管理算法(伙伴管理算法)的准备工作。
具体的算法初始化则回到start_kernel()函数接着往下走,下一个函数是mm_init():
-
【file:/init/main.c】
-
/*
-
* Set up kernel memory allocators
-
*/
-
static void __init mm_init(void)
-
{
-
/*
-
* page_cgroup requires contiguous pages,
-
* bigger than MAX_ORDER unless SPARSEMEM.
-
*/
-
page_cgroup_init_flatmem();
-
mem_init();
-
kmem_cache_init();
-
percpu_init_late();
-
pgtable_init();
-
vmalloc_init();
-
}
乍看仅仅是几个函数的调用,实际上这里的事情远远没这么简单。其中page_cgroup_init_flatmem()与cgroup相关,而mem_init()则是管理伙伴管理算法的初始化,此外kmem_cache_init()是用于内核slub内存分配体系的初始化,而vmalloc_init()则是用于vmalloc的初始化。
当前主要分析伙伴管理算法,则仅对mem_init()做专门的分析,其余的暂且后面再分析。
伙伴管理算法的初始化函数入口是mem_init(),其实现:
-
【file:/arch/x86/mm/init_32.c】
-
void __init mem_init(void)
-
{
-
pci_iommu_alloc();
-
-
#ifdef CONFIG_FLATMEM
-
BUG_ON(!mem_map);
-
#endif
-
/*
-
* With CONFIG_DEBUG_PAGEALLOC initialization of highmem pages has to
-
* be done before free_all_bootmem(). Memblock use free low memory for
-
* temporary data (see find_range_array()) and for this purpose can use
-
* pages that was already passed to the buddy allocator, hence marked as
-
* not accessible in the page tables when compiled with
-
* CONFIG_DEBUG_PAGEALLOC. Otherwise order of initialization is not
-
* important here.
-
*/
-
set_highmem_pages_init();
-
-
/* this will put all low memory onto the freelists */
-
free_all_bootmem();
-
-
after_bootmem = 1;
-
-
mem_init_print_info(NULL);
-
printk(KERN_INFO "virtual kernel memory layout:\n"
-
" fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
-
#ifdef CONFIG_HIGHMEM
-
" pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
-
#endif
-
" vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
-
" lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
-
" .init : 0x%08lx - 0x%08lx (%4ld kB)\n"
-
" .data : 0x%08lx - 0x%08lx (%4ld kB)\n"
-
" .text : 0x%08lx - 0x%08lx (%4ld kB)\n",
-
FIXADDR_START, FIXADDR_TOP,
-
(FIXADDR_TOP - FIXADDR_START) >> 10,
-
-
#ifdef CONFIG_HIGHMEM
-
PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
-
(LAST_PKMAP*PAGE_SIZE) >> 10,
-
#endif
-
-
VMALLOC_START, VMALLOC_END,
-
(VMALLOC_END - VMALLOC_START) >> 20,
-
-
(unsigned long)__va(0), (unsigned long)high_memory,
-
((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
-
-
(unsigned long)&__init_begin, (unsigned long)&__init_end,
-
((unsigned long)&__init_end -
-
(unsigned long)&__init_begin) >> 10,
-
-
(unsigned long)&_etext, (unsigned long)&_edata,
-
((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
-
-
(unsigned long)&_text, (unsigned long)&_etext,
-
((unsigned long)&_etext - (unsigned long)&_text) >> 10);
-
-
/*
-
* Check boundaries twice: Some fundamental inconsistencies can
-
* be detected at build time already.
-
*/
-
#define __FIXADDR_TOP (-PAGE_SIZE)
-
#ifdef CONFIG_HIGHMEM
-
BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
-
BUILD_BUG_ON(VMALLOC_END > PKMAP_BASE);
-
#endif
-
#define high_memory (-128UL << 20)
-
BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END);
-
#undef high_memory
-
#undef __FIXADDR_TOP
-
#ifdef CONFIG_RANDOMIZE_BASE
-
BUILD_BUG_ON(CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE);
-
#endif
-
-
#ifdef CONFIG_HIGHMEM
-
BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
-
BUG_ON(VMALLOC_END > PKMAP_BASE);
-
#endif
-
BUG_ON(VMALLOC_START >= VMALLOC_END);
-
BUG_ON((unsigned long)high_memory > VMALLOC_START);
-
-
if (boot_cpu_data.wp_works_ok < 0)
-
test_wp_bit();
-
}
其中pci_iommu_alloc()不是伙伴算法重点相关的函数,不过还是稍微记录一下:
-
【file:/arch/x86/kernel/pci-dma.c】
-
void __init pci_iommu_alloc(void)
-
{
-
struct iommu_table_entry *p;
-
-
sort_iommu_table(__iommu_table, __iommu_table_end);
-
check_iommu_entries(__iommu_table, __iommu_table_end);
-
-
for (p = __iommu_table; p < __iommu_table_end; p++) {
-
if (p && p->detect && p->detect() > 0) {
-
p->flags |= IOMMU_DETECTED;
-
if (p->early_init)
-
p->early_init();
-
if (p->flags & IOMMU_FINISH_IF_DETECTED)
-
break;
-
}
-
}
-
}
该函数主要是将iommu table先行排序检查,然后调用各个表项注册的函数进行初始化。
而接着的set_highmem_pages_init()则是伙伴算法的开始:
-
【file:/arch/x86/mm/highmem_32.c】
-
void __init set_highmem_pages_init(void)
-
{
-
struct zone *zone;
-
int nid;
-
-
/*
-
* Explicitly reset zone->managed_pages because set_highmem_pages_init()
-
* is invoked before free_all_bootmem()
-
*/
-
reset_all_zones_managed_pages();
-
for_each_zone(zone) {
-
unsigned long zone_start_pfn, zone_end_pfn;
-
-
if (!is_highmem(zone))
-
continue;
-
-
zone_start_pfn = zone->zone_start_pfn;
-
zone_end_pfn = zone_start_pfn + zone->spanned_pages;
-
-
nid = zone_to_nid(zone);
-
printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n",
-
zone->name, nid, zone_start_pfn, zone_end_pfn);
-
-
add_highpages_with_active_regions(nid, zone_start_pfn,
-
zone_end_pfn);
-
}
-
}
该函数中reset_all_zones_managed_pages()主要是将所有的内存管理区zone的页面管理数据进行清0重置。而接下来的for_each_zone(zone)循环体结合is_highmem(zone)判断则是用于遍历查找出高端内存的管理区,对查找到高端内存调则用add_highpages_with_active_regions()将其释放添加至伙伴管理算法中。
add_highpages_with_active_regions()具体实现:
-
【file:/arch/x86/mm/init_32.c】
-
void __init add_highpages_with_active_regions(int nid,
-
unsigned long start_pfn, unsigned long end_pfn)
-
{
-
phys_addr_t start, end;
-
u64 i;
-
-
for_each_free_mem_range(i, nid, &start, &end, NULL) {
-
unsigned long pfn = clamp_t(unsigned long, PFN_UP(start),
-
start_pfn, end_pfn);
-
unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end),
-
start_pfn, end_pfn);
-
for ( ; pfn < e_pfn; pfn++)
-
if (pfn_valid(pfn))
-
free_highmem_page(pfn_to_page(pfn));
-
}
-
}
其中for_each_free_mem_range(i, nid, &start,
&end, NULL)用于遍历查找memblock算法中空闲的空间区域,然后通过clamp_t()对空间区域进行去除内存空洞调整。里面的for ( ; pfn <
e_pfn; pfn++)则用于将空间区域的各页面通过free_highmem_page()进行释放处理,其中if (pfn_valid(pfn))用于判断页面的有效性,而pfn_to_page(pfn)则是将页框号转换为页面管理结构。
进一步分析free_highmem_page()实现:
-
【file:/mm/page_alloc.c】
-
void free_highmem_page(struct page *page)
-
{
-
__free_reserved_page(page);
-
totalram_pages++;
-
page_zone(page)->managed_pages++;
-
totalhigh_pages++;
-
}
其中totalram_pages用于记录内存的总页面数,page_zone(page)->managed_pages则是记录管理区的管理页面数,totalhigh_pages则是记录高端内存的页面总数;
具体看一下__free_reserved_page():
-
【file:/include/linux/mm.h】
-
/* Free the reserved page into the buddy system, so it gets managed. */
-
static inline void __free_reserved_page(struct page *page)
-
{
-
ClearPageReserved(page);
-
init_page_count(page);
-
__free_page(page);
-
}
其中ClearPageReserved定义在/include/linux/page-flags.h中:
#define CLEARPAGEFLAG(uname, lname) \
static inline void ClearPage##uname(struct page
*page) \
{
clear_bit(PG_##lname, &page->flags); }
用于清除页面的flag中的reserved标志位,表示页面属于动态内存。
接着的init_page_count()这是设置页面的_count引用计数,设置为1,用于为__free_page()释放页面到内存管理算法中做准备。最后的__free_page(),该函数既是初始化伙伴管理算法,同时也是伙伴管理算法释放页面的操作函数。暂且搁置分析__free_page()的实现,后面再详细深入。
接着回到mem_init ()里面下一个调用free_all_bootmem():
-
【file:/mm/nobootmem.c】
-
unsigned long __init free_all_bootmem(void)
-
{
-
unsigned long pages;
-
-
reset_all_zones_managed_pages();
-
-
/*
-
* We need to use NUMA_NO_NODE instead of NODE_DATA(0)->node_id
-
* because in some case like Node0 doesn't have RAM installed
-
* low ram will be on Node1
-
*/
-
pages = free_low_memory_core_early();
-
totalram_pages += pages;
-
-
return pages;
-
}
其中reset_all_zones_managed_pages()是用于重置管理区zone结构中的managed_pages成员数据,着重分析一下free_low_memory_core_early()实现:
-
【file:/mm/nobootmem.c】
-
static unsigned long __init free_low_memory_core_early(void)
-
{
-
unsigned long count = 0;
-
phys_addr_t start, end;
-
u64 i;
-
-
for_each_free_mem_range(i, NUMA_NO_NODE, &start, &end, NULL)
-
count += __free_memory_core(start, end);
-
-
#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
-
{
-
phys_addr_t size;
-
-
/* Free memblock.reserved array if it was allocated */
-
size = get_allocated_memblock_reserved_regions_info(&start);
-
if (size)
-
count += __free_memory_core(start, start + size);
-
-
/* Free memblock.memory array if it was allocated */
-
size = get_allocated_memblock_memory_regions_info(&start);
-
if (size)
-
count += __free_memory_core(start, start + size);
-
}
-
#endif
-
-
return count;
-
}
该函数通过for_each_free_mem_range()遍历memblock算法中的空闲内存空间,并调用__free_memory_core()来释放;而后面的get_allocated_memblock_reserved_regions_info()和get_allocated_memblock_memory_regions_info()用于获取通过申请而得的memblock管理算法空间,然后释放,其中如果其算法管理空间是系统定义的memblock_reserved_init_regions和memblock_memory_init_regions则仍保留不予以释放。
最后着重分析一下__free_memory_core()的实现:
-
【file:/mm/nobootmem.c】
-
static void __init __free_pages_memory(unsigned long start, unsigned long end)
-
{
-
int order;
-
-
while (start < end) {
-
order = min(MAX_ORDER - 1UL, __ffs(start));
-
-
while (start + (1UL << order) > end)
-
order--;
-
-
__free_pages_bootmem(pfn_to_page(start), order);
-
-
start += (1UL << order);
-
}
-
}
其里面的__free_pages_bootmem()则:
-
【file:/mm/nobootmem.c】
-
void __init __free_pages_bootmem(struct page *page, unsigned int order)
-
{
-
unsigned int nr_pages = 1 << order;
-
struct page *p = page;
-
unsigned int loop;
-
-
prefetchw(p);
-
for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
-
prefetchw(p + 1);
-
__ClearPageReserved(p);
-
set_page_count(p, 0);
-
}
-
__ClearPageReserved(p);
-
set_page_count(p, 0);
-
-
page_zone(page)->managed_pages += nr_pages;
-
set_page_refcounted(page);
-
__free_pages(page, order);
-
}
由此可以看到,其最终调用的还是__free_pages()将页面予以释放。该函数在后面集中进行分析。
至此,伙伴管理算法初始化完毕。
阅读(3188) | 评论(0) | 转发(1) |