Chinaunix首页 | 论坛 | 博客
  • 博客访问: 294186
  • 博文数量: 44
  • 博客积分: 10
  • 博客等级: 民兵
  • 技术积分: 1354
  • 用 户 组: 普通用户
  • 注册时间: 2012-04-08 15:38
个人简介

人生像是在跑马拉松,能够完赛的都是不断地坚持向前迈进;人生就是像在跑马拉松,不断调整步伐,把握好分分秒秒;人生还是像在跑马拉松,能力决定了能跑短程、半程还是全程。人生其实就是一场马拉松,坚持不懈,珍惜时间。

文章分类

分类: LINUX

2015-03-08 23:09:46

前面已经分析了linux内存管理算法(伙伴管理算法)的准备工作。

具体的算法初始化则回到start_kernel()函数接着往下走,下一个函数是mm_init()

  1. 【file:/init/main.c】
  2. /*
  3.  * Set up kernel memory allocators
  4.  */
  5. static void __init mm_init(void)
  6. {
  7.     /*
  8.      * page_cgroup requires contiguous pages,
  9.      * bigger than MAX_ORDER unless SPARSEMEM.
  10.      */
  11.     page_cgroup_init_flatmem();
  12.     mem_init();
  13.     kmem_cache_init();
  14.     percpu_init_late();
  15.     pgtable_init();
  16.     vmalloc_init();
  17. }

乍看仅仅是几个函数的调用,实际上这里的事情远远没这么简单。其中page_cgroup_init_flatmem()cgroup相关,而mem_init()则是管理伙伴管理算法的初始化,此外kmem_cache_init()是用于内核slub内存分配体系的初始化,而vmalloc_init()则是用于vmalloc的初始化。

当前主要分析伙伴管理算法,则仅对mem_init()做专门的分析,其余的暂且后面再分析。

伙伴管理算法的初始化函数入口是mem_init(),其实现:

  1. 【file:/arch/x86/mm/init_32.c】
  2. void __init mem_init(void)
  3. {
  4.     pci_iommu_alloc();
  5.  
  6. #ifdef CONFIG_FLATMEM
  7.     BUG_ON(!mem_map);
  8. #endif
  9.     /*
  10.      * With CONFIG_DEBUG_PAGEALLOC initialization of highmem pages has to
  11.      * be done before free_all_bootmem(). Memblock use free low memory for
  12.      * temporary data (see find_range_array()) and for this purpose can use
  13.      * pages that was already passed to the buddy allocator, hence marked as
  14.      * not accessible in the page tables when compiled with
  15.      * CONFIG_DEBUG_PAGEALLOC. Otherwise order of initialization is not
  16.      * important here.
  17.      */
  18.     set_highmem_pages_init();
  19.  
  20.     /* this will put all low memory onto the freelists */
  21.     free_all_bootmem();
  22.  
  23.     after_bootmem = 1;
  24.  
  25.     mem_init_print_info(NULL);
  26.     printk(KERN_INFO "virtual kernel memory layout:\n"
  27.         " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
  28. #ifdef CONFIG_HIGHMEM
  29.         " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
  30. #endif
  31.         " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
  32.         " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
  33.         " .init : 0x%08lx - 0x%08lx (%4ld kB)\n"
  34.         " .data : 0x%08lx - 0x%08lx (%4ld kB)\n"
  35.         " .text : 0x%08lx - 0x%08lx (%4ld kB)\n",
  36.         FIXADDR_START, FIXADDR_TOP,
  37.         (FIXADDR_TOP - FIXADDR_START) >> 10,
  38.  
  39. #ifdef CONFIG_HIGHMEM
  40.         PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
  41.         (LAST_PKMAP*PAGE_SIZE) >> 10,
  42. #endif
  43.  
  44.         VMALLOC_START, VMALLOC_END,
  45.         (VMALLOC_END - VMALLOC_START) >> 20,
  46.  
  47.         (unsigned long)__va(0), (unsigned long)high_memory,
  48.         ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
  49.  
  50.         (unsigned long)&__init_begin, (unsigned long)&__init_end,
  51.         ((unsigned long)&__init_end -
  52.          (unsigned long)&__init_begin) >> 10,
  53.  
  54.         (unsigned long)&_etext, (unsigned long)&_edata,
  55.         ((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
  56.  
  57.         (unsigned long)&_text, (unsigned long)&_etext,
  58.         ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
  59.  
  60.     /*
  61.      * Check boundaries twice: Some fundamental inconsistencies can
  62.      * be detected at build time already.
  63.      */
  64. #define __FIXADDR_TOP (-PAGE_SIZE)
  65. #ifdef CONFIG_HIGHMEM
  66.     BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
  67.     BUILD_BUG_ON(VMALLOC_END > PKMAP_BASE);
  68. #endif
  69. #define high_memory (-128UL << 20)
  70.     BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END);
  71. #undef high_memory
  72. #undef __FIXADDR_TOP
  73. #ifdef CONFIG_RANDOMIZE_BASE
  74.     BUILD_BUG_ON(CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE);
  75. #endif
  76.  
  77. #ifdef CONFIG_HIGHMEM
  78.     BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
  79.     BUG_ON(VMALLOC_END > PKMAP_BASE);
  80. #endif
  81.     BUG_ON(VMALLOC_START >= VMALLOC_END);
  82.     BUG_ON((unsigned long)high_memory > VMALLOC_START);
  83.  
  84.     if (boot_cpu_data.wp_works_ok < 0)
  85.         test_wp_bit();
  86. }

其中pci_iommu_alloc()不是伙伴算法重点相关的函数,不过还是稍微记录一下:

  1. 【file:/arch/x86/kernel/pci-dma.c】
  2. void __init pci_iommu_alloc(void)
  3. {
  4.     struct iommu_table_entry *p;
  5.  
  6.     sort_iommu_table(__iommu_table, __iommu_table_end);
  7.     check_iommu_entries(__iommu_table, __iommu_table_end);
  8.  
  9.     for (p = __iommu_table; p < __iommu_table_end; p++) {
  10.         if (p && p->detect && p->detect() > 0) {
  11.             p->flags |= IOMMU_DETECTED;
  12.             if (p->early_init)
  13.                 p->early_init();
  14.             if (p->flags & IOMMU_FINISH_IF_DETECTED)
  15.                 break;
  16.         }
  17.     }
  18. }

该函数主要是将iommu table先行排序检查,然后调用各个表项注册的函数进行初始化。

而接着的set_highmem_pages_init()则是伙伴算法的开始:

  1. 【file:/arch/x86/mm/highmem_32.c】
  2. void __init set_highmem_pages_init(void)
  3. {
  4.     struct zone *zone;
  5.     int nid;
  6.  
  7.     /*
  8.      * Explicitly reset zone->managed_pages because set_highmem_pages_init()
  9.      * is invoked before free_all_bootmem()
  10.      */
  11.     reset_all_zones_managed_pages();
  12.     for_each_zone(zone) {
  13.         unsigned long zone_start_pfn, zone_end_pfn;
  14.  
  15.         if (!is_highmem(zone))
  16.             continue;
  17.  
  18.         zone_start_pfn = zone->zone_start_pfn;
  19.         zone_end_pfn = zone_start_pfn + zone->spanned_pages;
  20.  
  21.         nid = zone_to_nid(zone);
  22.         printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n",
  23.                 zone->name, nid, zone_start_pfn, zone_end_pfn);
  24.  
  25.         add_highpages_with_active_regions(nid, zone_start_pfn,
  26.                  zone_end_pfn);
  27.     }
  28. }

该函数中reset_all_zones_managed_pages()主要是将所有的内存管理区zone的页面管理数据进行清0重置。而接下来的for_each_zone(zone)循环体结合is_highmem(zone)判断则是用于遍历查找出高端内存的管理区,对查找到高端内存调则用add_highpages_with_active_regions()将其释放添加至伙伴管理算法中。

add_highpages_with_active_regions()具体实现:

  1. 【file:/arch/x86/mm/init_32.c】
  2. void __init add_highpages_with_active_regions(int nid,
  3.              unsigned long start_pfn, unsigned long end_pfn)
  4. {
  5.     phys_addr_t start, end;
  6.     u64 i;
  7.  
  8.     for_each_free_mem_range(i, nid, &start, &end, NULL) {
  9.         unsigned long pfn = clamp_t(unsigned long, PFN_UP(start),
  10.                         start_pfn, end_pfn);
  11.         unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end),
  12.                           start_pfn, end_pfn);
  13.         for ( ; pfn < e_pfn; pfn++)
  14.             if (pfn_valid(pfn))
  15.                 free_highmem_page(pfn_to_page(pfn));
  16.     }
  17. }

其中for_each_free_mem_range(i, nid, &start, &end, NULL)用于遍历查找memblock算法中空闲的空间区域,然后通过clamp_t()对空间区域进行去除内存空洞调整。里面的for ( ; pfn < e_pfn; pfn++)则用于将空间区域的各页面通过free_highmem_page()进行释放处理,其中if (pfn_valid(pfn))用于判断页面的有效性,而pfn_to_page(pfn)则是将页框号转换为页面管理结构。

进一步分析free_highmem_page()实现:

  1. 【file:/mm/page_alloc.c】
  2. void free_highmem_page(struct page *page)
  3. {
  4.     __free_reserved_page(page);
  5.     totalram_pages++;
  6.     page_zone(page)->managed_pages++;
  7.     totalhigh_pages++;
  8. }

其中totalram_pages用于记录内存的总页面数,page_zone(page)->managed_pages则是记录管理区的管理页面数,totalhigh_pages则是记录高端内存的页面总数;

具体看一下__free_reserved_page()

  1. 【file:/include/linux/mm.h】
  2. /* Free the reserved page into the buddy system, so it gets managed. */
  3. static inline void __free_reserved_page(struct page *page)
  4. {
  5.     ClearPageReserved(page);
  6.     init_page_count(page);
  7.     __free_page(page);
  8. }

其中ClearPageReserved定义在/include/linux/page-flags.h中:

#define CLEARPAGEFLAG(uname, lname)                 \

static inline void ClearPage##uname(struct page *page)          \

                                    { clear_bit(PG_##lname, &page->flags); }

用于清除页面的flag中的reserved标志位,表示页面属于动态内存。

接着的init_page_count()这是设置页面的_count引用计数,设置为1,用于为__free_page()释放页面到内存管理算法中做准备。最后的__free_page(),该函数既是初始化伙伴管理算法,同时也是伙伴管理算法释放页面的操作函数。暂且搁置分析__free_page()的实现,后面再详细深入。

接着回到mem_init ()里面下一个调用free_all_bootmem()

  1. 【file:/mm/nobootmem.c】
  2. unsigned long __init free_all_bootmem(void)
  3. {
  4.     unsigned long pages;
  5.  
  6.     reset_all_zones_managed_pages();
  7.  
  8.     /*
  9.      * We need to use NUMA_NO_NODE instead of NODE_DATA(0)->node_id
  10.      * because in some case like Node0 doesn't have RAM installed
  11.      * low ram will be on Node1
  12.      */
  13.     pages = free_low_memory_core_early();
  14.     totalram_pages += pages;
  15.  
  16.     return pages;
  17. }

    其中reset_all_zones_managed_pages()是用于重置管理区zone结构中的managed_pages成员数据,着重分析一下free_low_memory_core_early()实现:

  1. 【file:/mm/nobootmem.c】
  2. static unsigned long __init free_low_memory_core_early(void)
  3. {
  4.     unsigned long count = 0;
  5.     phys_addr_t start, end;
  6.     u64 i;
  7.  
  8.     for_each_free_mem_range(i, NUMA_NO_NODE, &start, &end, NULL)
  9.         count += __free_memory_core(start, end);
  10.  
  11. #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
  12.     {
  13.         phys_addr_t size;
  14.  
  15.         /* Free memblock.reserved array if it was allocated */
  16.         size = get_allocated_memblock_reserved_regions_info(&start);
  17.         if (size)
  18.             count += __free_memory_core(start, start + size);
  19.  
  20.         /* Free memblock.memory array if it was allocated */
  21.         size = get_allocated_memblock_memory_regions_info(&start);
  22.         if (size)
  23.             count += __free_memory_core(start, start + size);
  24.     }
  25. #endif
  26.  
  27.     return count;
  28. }

该函数通过for_each_free_mem_range()遍历memblock算法中的空闲内存空间,并调用__free_memory_core()来释放;而后面的get_allocated_memblock_reserved_regions_info()get_allocated_memblock_memory_regions_info()用于获取通过申请而得的memblock管理算法空间,然后释放,其中如果其算法管理空间是系统定义的memblock_reserved_init_regionsmemblock_memory_init_regions则仍保留不予以释放。

最后着重分析一下__free_memory_core()的实现:

  1. 【file:/mm/nobootmem.c】
  2. static void __init __free_pages_memory(unsigned long start, unsigned long end)
  3. {
  4.     int order;
  5.  
  6.     while (start < end) {
  7.         order = min(MAX_ORDER - 1UL, __ffs(start));
  8.  
  9.         while (start + (1UL << order) > end)
  10.             order--;
  11.  
  12.         __free_pages_bootmem(pfn_to_page(start), order);
  13.  
  14.         start += (1UL << order);
  15.     }
  16. }

    其里面的__free_pages_bootmem()则:

  1. 【file:/mm/nobootmem.c】
  2. void __init __free_pages_bootmem(struct page *page, unsigned int order)
  3. {
  4.     unsigned int nr_pages = 1 << order;
  5.     struct page *p = page;
  6.     unsigned int loop;
  7.  
  8.     prefetchw(p);
  9.     for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
  10.         prefetchw(p + 1);
  11.         __ClearPageReserved(p);
  12.         set_page_count(p, 0);
  13.     }
  14.     __ClearPageReserved(p);
  15.     set_page_count(p, 0);
  16.  
  17.     page_zone(page)->managed_pages += nr_pages;
  18.     set_page_refcounted(page);
  19.     __free_pages(page, order);
  20. }

   由此可以看到,其最终调用的还是__free_pages()将页面予以释放。该函数在后面集中进行分析。

   至此,伙伴管理算法初始化完毕。

阅读(3171) | 评论(0) | 转发(1) |
给主人留下些什么吧!~~