Chinaunix首页 | 论坛 | 博客
  • 博客访问: 294444
  • 博文数量: 44
  • 博客积分: 10
  • 博客等级: 民兵
  • 技术积分: 1354
  • 用 户 组: 普通用户
  • 注册时间: 2012-04-08 15:38
个人简介

人生像是在跑马拉松,能够完赛的都是不断地坚持向前迈进;人生就是像在跑马拉松,不断调整步伐,把握好分分秒秒;人生还是像在跑马拉松,能力决定了能跑短程、半程还是全程。人生其实就是一场马拉松,坚持不懈,珍惜时间。

文章分类

分类: LINUX

2015-01-01 12:46:31

此处接前文,分析free_area_init_nodes()函数最后部分,分析其末尾的循环:

    for_each_online_node(nid) {

        pg_data_t *pgdat = NODE_DATA(nid);

        free_area_init_node(nid, NULL,

                find_min_pfn_for_node(nid), NULL);

        /* Any memory on that node */

        if (pgdat->node_present_pages)

            node_set_state(nid, N_MEMORY);

        check_for_memory(pgdat, nid);

    }

这里面的关键函数是free_area_init_node(),其入参find_min_pfn_for_node()用于获取node节点中最低的内存页框号。

free_area_init_node()其实现:

  1. 【file:/mm/page_alloc.c】
  2. void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
  3.         unsigned long node_start_pfn, unsigned long *zholes_size)
  4. {
  5.     pg_data_t *pgdat = NODE_DATA(nid);
  6.     unsigned long start_pfn = 0;
  7.     unsigned long end_pfn = 0;
  8.  
  9.     /* pg_data_t should be reset to zero when it's allocated */
  10.     WARN_ON(pgdat->nr_zones || pgdat->classzone_idx);
  11.  
  12.     pgdat->node_id = nid;
  13.     pgdat->node_start_pfn = node_start_pfn;
  14.     init_zone_allows_reclaim(nid);
  15. #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
  16.     get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
  17. #endif
  18.     calculate_node_totalpages(pgdat, start_pfn, end_pfn,
  19.                   zones_size, zholes_size);
  20.  
  21.     alloc_node_mem_map(pgdat);
  22. #ifdef CONFIG_FLAT_NODE_MEM_MAP
  23.     printk(KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx\n",
  24.         nid, (unsigned long)pgdat,
  25.         (unsigned long)pgdat->node_mem_map);
  26. #endif
  27.  
  28.     free_area_init_core(pgdat, start_pfn, end_pfn,
  29.                 zones_size, zholes_size);
  30. }

该函数中,其中init_zone_allows_reclaim()用于计算评估内存管理区是否可回收以及合适的node节点数,如果非NUMA环境,则该函数为空。而基于CONFIG_HAVE_MEMBLOCK_NODE_MAP的配置下,接下来将是get_pfn_range_for_nid()

  1. 【file:/mm/page_alloc.c】
  2. /**
  3.  * get_pfn_range_for_nid - Return the start and end page frames for a node
  4.  * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned.
  5.  * @start_pfn: Passed by reference. On return, it will have the node start_pfn.
  6.  * @end_pfn: Passed by reference. On return, it will have the node end_pfn.
  7.  *
  8.  * It returns the start and end page frame of a node based on information
  9.  * provided by an arch calling add_active_range(). If called for a node
  10.  * with no available memory, a warning is printed and the start and end
  11.  * PFNs will be 0.
  12.  */
  13. void __meminit get_pfn_range_for_nid(unsigned int nid,
  14.             unsigned long *start_pfn, unsigned long *end_pfn)
  15. {
  16.     unsigned long this_start_pfn, this_end_pfn;
  17.     int i;
  18.  
  19.     *start_pfn = -1UL;
  20.     *end_pfn = 0;
  21.  
  22.     for_each_mem_pfn_range(i, nid, &this_start_pfn, &this_end_pfn, NULL) {
  23.         *start_pfn = min(*start_pfn, this_start_pfn);
  24.         *end_pfn = max(*end_pfn, this_end_pfn);
  25.     }
  26.  
  27.     if (*start_pfn == -1UL)
  28.         *start_pfn = 0;
  29. }

此函数主要是将内存node节点的起始和末尾页框号返回给接下来的calculate_node_totalpages()来使用。

calculate_node_totalpages()实现:

  1. 【file:/mm/page_alloc.c】
  2. static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
  3.                         unsigned long node_start_pfn,
  4.                         unsigned long node_end_pfn,
  5.                         unsigned long *zones_size,
  6.                         unsigned long *zholes_size)
  7. {
  8.     unsigned long realtotalpages, totalpages = 0;
  9.     enum zone_type i;
  10.  
  11.     for (i = 0; i < MAX_NR_ZONES; i++)
  12.         totalpages += zone_spanned_pages_in_node(pgdat->node_id, i,
  13.                              node_start_pfn,
  14.                              node_end_pfn,
  15.                              zones_size);
  16.     pgdat->node_spanned_pages = totalpages;
  17.  
  18.     realtotalpages = totalpages;
  19.     for (i = 0; i < MAX_NR_ZONES; i++)
  20.         realtotalpages -=
  21.             zone_absent_pages_in_node(pgdat->node_id, i,
  22.                           node_start_pfn, node_end_pfn,
  23.                           zholes_size);
  24.     pgdat->node_present_pages = realtotalpages;
  25.     printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id,
  26.                             realtotalpages);
  27. }

其中zone_spanned_pages_in_node()

  1. 【file:/mm/page_alloc.c】
  2. /*
  3.  * Return the number of pages a zone spans in a node, including holes
  4.  * present_pages = zone_spanned_pages_in_node() - zone_absent_pages_in_node()
  5.  */
  6. static unsigned long __meminit zone_spanned_pages_in_node(int nid,
  7.                     unsigned long zone_type,
  8.                     unsigned long node_start_pfn,
  9.                     unsigned long node_end_pfn,
  10.                     unsigned long *ignored)
  11. {
  12.     unsigned long zone_start_pfn, zone_end_pfn;
  13.  
  14.     /* Get the start and end of the zone */
  15.     zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
  16.     zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
  17.     adjust_zone_range_for_zone_movable(nid, zone_type,
  18.                 node_start_pfn, node_end_pfn,
  19.                 &zone_start_pfn, &zone_end_pfn);
  20.  
  21.     /* Check that this node has pages within the zone's required range */
  22.     if (zone_end_pfn < node_start_pfn || zone_start_pfn > node_end_pfn)
  23.         return 0;
  24.  
  25.     /* Move the zone boundaries inside the node if necessary */
  26.     zone_end_pfn = min(zone_end_pfn, node_end_pfn);
  27.     zone_start_pfn = max(zone_start_pfn, node_start_pfn);
  28.  
  29.     /* Return the spanned pages */
  30.     return zone_end_pfn - zone_start_pfn;
  31. }

其主要是统计node管理节点的内存跨度,该跨度不包括movable管理区的,里面调用的adjust_zone_range_for_zone_movable()则是用于剔除movable管理区的部分。

另外的zone_absent_pages_in_node()

  1. 【file:/mm/page_alloc.c】
  2. /* Return the number of page frames in holes in a zone on a node */
  3. static unsigned long __meminit zone_absent_pages_in_node(int nid,
  4.                     unsigned long zone_type,
  5.                     unsigned long node_start_pfn,
  6.                     unsigned long node_end_pfn,
  7.                     unsigned long *ignored)
  8. {
  9.     unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type];
  10.     unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
  11.     unsigned long zone_start_pfn, zone_end_pfn;
  12.  
  13.     zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high);
  14.     zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high);
  15.  
  16.     adjust_zone_range_for_zone_movable(nid, zone_type,
  17.             node_start_pfn, node_end_pfn,
  18.             &zone_start_pfn, &zone_end_pfn);
  19.     return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
  20. }

该函数主要用于计算内存空洞页面数的。完了将会得到物理页面总数并在calculate_node_totalpages()中将页面总数打印出来:

紧接着在free_area_init_node()调用的是alloc_node_mem_map()

  1. 【file:/mm/page_alloc.c】
  2. static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
  3. {
  4.     /* Skip empty nodes */
  5.     if (!pgdat->node_spanned_pages)
  6.         return;
  7.  
  8. #ifdef CONFIG_FLAT_NODE_MEM_MAP
  9.     /* ia64 gets its own node_mem_map, before this, without bootmem */
  10.     if (!pgdat->node_mem_map) {
  11.         unsigned long size, start, end;
  12.         struct page *map;
  13.  
  14.         /*
  15.          * The zone's endpoints aren't required to be MAX_ORDER
  16.          * aligned but the node_mem_map endpoints must be in order
  17.          * for the buddy allocator to function correctly.
  18.          */
  19.         start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1);
  20.         end = pgdat_end_pfn(pgdat);
  21.         end = ALIGN(end, MAX_ORDER_NR_PAGES);
  22.         size = (end - start) * sizeof(struct page);
  23.         map = alloc_remap(pgdat->node_id, size);
  24.         if (!map)
  25.             map = memblock_virt_alloc_node_nopanic(size,
  26.                                    pgdat->node_id);
  27.         pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
  28.     }
  29. #ifndef CONFIG_NEED_MULTIPLE_NODES
  30.     /*
  31.      * With no DISCONTIG, the global mem_map is just set as node 0's
  32.      */
  33.     if (pgdat == NODE_DATA(0)) {
  34.         mem_map = NODE_DATA(0)->node_mem_map;
  35. #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
  36.         if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
  37.             mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET);
  38. #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
  39.     }
  40. #endif
  41. #endif /* CONFIG_FLAT_NODE_MEM_MAP */
  42. }

其主要将calculate_node_totalpages()统计所得的内存页面信息进行内存空间申请。

得到内存空间后,初始化工作将交由free_area_init_core()

  1. 【file:/mm/page_alloc.c】
  2. /*
  3.  * Set up the zone data structures:
  4.  * - mark all pages reserved
  5.  * - mark all memory queues empty
  6.  * - clear the memory bitmaps
  7.  *
  8.  * NOTE: pgdat should get zeroed by caller.
  9.  */
  10. static void __paginginit free_area_init_core(struct pglist_data *pgdat,
  11.         unsigned long node_start_pfn, unsigned long node_end_pfn,
  12.         unsigned long *zones_size, unsigned long *zholes_size)
  13. {
  14.     enum zone_type j;
  15.     int nid = pgdat->node_id;
  16.     unsigned long zone_start_pfn = pgdat->node_start_pfn;
  17.     int ret;
  18.  
  19.     pgdat_resize_init(pgdat);
  20. #ifdef CONFIG_NUMA_BALANCING
  21.     spin_lock_init(&pgdat->numabalancing_migrate_lock);
  22.     pgdat->numabalancing_migrate_nr_pages = 0;
  23.     pgdat->numabalancing_migrate_next_window = jiffies;
  24. #endif
  25.     init_waitqueue_head(&pgdat->kswapd_wait);
  26.     init_waitqueue_head(&pgdat->pfmemalloc_wait);
  27.     pgdat_page_cgroup_init(pgdat);
  28.  
  29.     for (j = 0; j < MAX_NR_ZONES; j++) {
  30.         struct zone *zone = pgdat->node_zones + j;
  31.         unsigned long size, realsize, freesize, memmap_pages;
  32.  
  33.         size = zone_spanned_pages_in_node(nid, j, node_start_pfn,
  34.                           node_end_pfn, zones_size);
  35.         realsize = freesize = size - zone_absent_pages_in_node(nid, j,
  36.                                 node_start_pfn,
  37.                                 node_end_pfn,
  38.                                 zholes_size);
  39.  
  40.         /*
  41.          * Adjust freesize so that it accounts for how much memory
  42.          * is used by this zone for memmap. This affects the watermark
  43.          * and per-cpu initialisations
  44.          */
  45.         memmap_pages = calc_memmap_size(size, realsize);
  46.         if (freesize >= memmap_pages) {
  47.             freesize -= memmap_pages;
  48.             if (memmap_pages)
  49.                 printk(KERN_DEBUG
  50.                        " %s zone: %lu pages used for memmap\n",
  51.                        zone_names[j], memmap_pages);
  52.         } else
  53.             printk(KERN_WARNING
  54.                 " %s zone: %lu pages exceeds freesize %lu\n",
  55.                 zone_names[j], memmap_pages, freesize);
  56.  
  57.         /* Account for reserved pages */
  58.         if (j == 0 && freesize > dma_reserve) {
  59.             freesize -= dma_reserve;
  60.             printk(KERN_DEBUG " %s zone: %lu pages reserved\n",
  61.                     zone_names[0], dma_reserve);
  62.         }
  63.  
  64.         if (!is_highmem_idx(j))
  65.             nr_kernel_pages += freesize;
  66.         /* Charge for highmem memmap if there are enough kernel pages */
  67.         else if (nr_kernel_pages > memmap_pages * 2)
  68.             nr_kernel_pages -= memmap_pages;
  69.         nr_all_pages += freesize;
  70.  
  71.         zone->spanned_pages = size;
  72.         zone->present_pages = realsize;
  73.         /*
  74.          * Set an approximate value for lowmem here, it will be adjusted
  75.          * when the bootmem allocator frees pages into the buddy system.
  76.          * And all highmem pages will be managed by the buddy system.
  77.          */
  78.         zone->managed_pages = is_highmem_idx(j) ? realsize : freesize;
  79. #ifdef CONFIG_NUMA
  80.         zone->node = nid;
  81.         zone->min_unmapped_pages = (freesize*sysctl_min_unmapped_ratio)
  82.                         / 100;
  83.         zone->min_slab_pages = (freesize * sysctl_min_slab_ratio) / 100;
  84. #endif
  85.         zone->name = zone_names[j];
  86.         spin_lock_init(&zone->lock);
  87.         spin_lock_init(&zone->lru_lock);
  88.         zone_seqlock_init(zone);
  89.         zone->zone_pgdat = pgdat;
  90.         zone_pcp_init(zone);
  91.  
  92.         /* For bootup, initialized properly in watermark setup */
  93.         mod_zone_page_state(zone, NR_ALLOC_BATCH, zone->managed_pages);
  94.  
  95.         lruvec_init(&zone->lruvec);
  96.         if (!size)
  97.             continue;
  98.  
  99.         set_pageblock_order();
  100.         setup_usemap(pgdat, zone, zone_start_pfn, size);
  101.         ret = init_currently_empty_zone(zone, zone_start_pfn,
  102.                         size, MEMMAP_EARLY);
  103.         BUG_ON(ret);
  104.         memmap_init(size, nid, j, zone_start_pfn);
  105.         zone_start_pfn += size;
  106.     }
  107. }
      该函数主要用于设置了内存管理节点的管理结构体,包括pgdat_resize_init()初始化锁资源、init_waitqueue_head()初始内存队列、pgdat_page_cgroup_init()控制组群初始化。

而在for循环内,循环遍历统计各个管理区最大跨度间相差的页面数size以及除去内存“空洞”后的实际页面数realsize,然后通过calc_memmap_size()计算出该管理区所需的页面管理结构占用的页面数memmap_pages,最后可以计算得除高端内存外的系统内存共有的内存页面数nr_kernel_pages(用于统计所有一致映射的页);此外循环体内的操作则是初始化内存管理区的管理结构,例如各类锁的初始化、队列初始化。值得注意的是zone_pcp_init()是初始化冷热页分配器的,mod_zone_page_state()用于计算更新管理区的状态统计,lruvec_init()则是初始化LRU算法使用的链表和保护锁,而set_pageblock_order()用于在CONFIG_HUGETLB_PAGE_SIZE_VARIABLE配置下设置pageblock_order值的;此外setup_usemap()函数则是主要是为了给zone管理结构体中的pageblock_flags申请内存空间,pageblock_flags与伙伴系统的碎片迁移算法有关。而init_currently_empty_zone()则主要初始化管理区的等待队列哈希表和等待队列,同时还初始化了与伙伴系统相关的free_aera列表。

中间有部分日志记录可以通过dmesg查看到:

free_area_init_core()的最后,着重分析一下memmap_init()

  1. 【file:/mm/page_alloc.c】
  2. #define memmap_init(size, nid, zone, start_pfn) \
  3.     memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY)

其对应的是memmap_init_zone()

  1. 【file:/mm/page_alloc.c】
  2. /*
  3.  * Initially all pages are reserved - free ones are freed
  4.  * up by free_all_bootmem() once the early boot process is
  5.  * done. Non-atomic initialization, single-pass.
  6.  */
  7. void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
  8.         unsigned long start_pfn, enum memmap_context context)
  9. {
  10.     struct page *page;
  11.     unsigned long end_pfn = start_pfn + size;
  12.     unsigned long pfn;
  13.     struct zone *z;
  14.  
  15.     if (highest_memmap_pfn < end_pfn - 1)
  16.         highest_memmap_pfn = end_pfn - 1;
  17.  
  18.     z = &NODE_DATA(nid)->node_zones[zone];
  19.     for (pfn = start_pfn; pfn < end_pfn; pfn++) {
  20.         /*
  21.          * There can be holes in boot-time mem_map[]s
  22.          * handed to this function. They do not
  23.          * exist on hotplugged memory.
  24.          */
  25.         if (context == MEMMAP_EARLY) {
  26.             if (!early_pfn_valid(pfn))
  27.                 continue;
  28.             if (!early_pfn_in_nid(pfn, nid))
  29.                 continue;
  30.         }
  31.         page = pfn_to_page(pfn);
  32.         set_page_links(page, zone, nid, pfn);
  33.         mminit_verify_page_links(page, zone, nid, pfn);
  34.         init_page_count(page);
  35.         page_mapcount_reset(page);
  36.         page_cpupid_reset_last(page);
  37.         SetPageReserved(page);
  38.         /*
  39.          * Mark the block movable so that blocks are reserved for
  40.          * movable at startup. This will force kernel allocations
  41.          * to reserve their blocks rather than leaking throughout
  42.          * the address space during boot when many long-lived
  43.          * kernel allocations are made. Later some blocks near
  44.          * the start are marked MIGRATE_RESERVE by
  45.          * setup_zone_migrate_reserve()
  46.          *
  47.          * bitmap is created for zone's valid pfn range. but memmap
  48.          * can be created for invalid pages (for alignment)
  49.          * check here not to call set_pageblock_migratetype() against
  50.          * pfn out of zone.
  51.          */
  52.         if ((z->zone_start_pfn <= pfn)
  53.             && (pfn < zone_end_pfn(z))
  54.             && !(pfn & (pageblock_nr_pages - 1)))
  55.             set_pageblock_migratetype(page, MIGRATE_MOVABLE);
  56.  
  57.         INIT_LIST_HEAD(&page->lru);
  58. #ifdef WANT_PAGE_VIRTUAL
  59.         /* The shift won't overflow because ZONE_NORMAL is below 4G. */
  60.         if (!is_highmem_idx(zone))
  61.             set_page_address(page, __va(pfn << PAGE_SHIFT));
  62. #endif
  63.     }
  64. }

该函数主要根据页框号pfn通过pfn_to_page()查找到页面管理结构page,而后面的操作则是对该页面的管理结构page进行初始化。

至此,free_area_init_node()的初始化操作执行完毕,据前面分析可以知道其主要是将整个linux物理内存管理框架进行初始化,包括管理节点node、管理区zone以及页面管理page等数据的初始化。

回到本文主题,循环体内最后的两个函数node_set_state()check_for_memory()node_set_state()主要是对node节点进行状态设置,而check_for_memory()则是做内存检查。

至此,内存管理框架构建完毕。

阅读(2921) | 评论(1) | 转发(2) |
给主人留下些什么吧!~~

hkok1232015-11-23 11:40:02

free_area_init_core讲的真心不错,thx