Chinaunix首页 | 论坛 | 博客
  • 博客访问: 2159649
  • 博文数量: 438
  • 博客积分: 3871
  • 博客等级: 中校
  • 技术积分: 6075
  • 用 户 组: 普通用户
  • 注册时间: 2011-09-10 00:11
个人简介

邮箱: wangcong02345@163.com

文章分类

全部博文(438)

文章存档

2017年(15)

2016年(119)

2015年(91)

2014年(62)

2013年(56)

2012年(79)

2011年(16)

分类: LINUX

2016-11-11 16:56:36

一. 总体说明
1.1 页表的初始化都是在paging_init中
paging_initsetup_arch
--> paging_init
{
    a.free_area_init_core中在物理地址16M=0xC1000000处,建立了mem_map用page去管理内存
     a.1 刚开始时设置page_count=0
     a.2 设置flag=reserved
     a.3 初始化zone结构体,使mem_map中virtual都指向相应的虚拟地址
    b.free_all_bootmem_core
}

二.代码分析
在arch/i386/mm/init.c中-->start_kernel-->setup_arch-->paging_init
  1. void __init paging_init(void)
  2. {
  3.     pagetable_init();     //2.1设置页目录表
  4. //将swapper_pg_dir=0xc0101000,赋到cr3中,告诉cpu这个是页目录表的基地址
  5.     __asm__( "movl %%ecx,%%cr3\n" ::"c"(__pa(swapper_pg_dir)));

  6. #if CONFIG_X86_PAE
  7.     /*
  8.      * We will bail out later - printk doesnt work right now so
  9.      * the user would just see a hanging kernel.
  10.      */
  11.     if (cpu_has_pae)
  12.         set_in_cr4(X86_CR4_PAE);
  13. #endif
  14. //flush一下使修改后的页目录表生效
  15.     __flush_tlb_all();

  16. #ifdef CONFIG_HIGHMEM
  17.     kmap_init();      //初始化几个变量kmap_vstart=0xffff5000,kmap_pte=0xc0003fd4,kmap_prot=0x163
  18. #endif
  19. //初始化三个管理区的size
  20.     {
  21.         unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};     //MAX_NR_ZONES=3,DMA,NORMAL,HIGH
  22.         unsigned int max_dma, high, low;

  23.         max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; //MAX_DMA_ADDRESS=16M,取物理地址的页帧=0x1000
  24.         low = max_low_pfn;                             //low=896M的页帧=0x38000
  25.         high = highend_pfn;                            //high=实际内存的页帧=我这儿给qemu了1G内存=0x3fffe

  26.         if (low < max_dma)
  27.             zones_size[ZONE_DMA] = low;
  28.         else {
  29.             zones_size[ZONE_DMA] = max_dma;
  30.             zones_size[ZONE_NORMAL] = low - max_dma;
  31. #ifdef CONFIG_HIGHMEM
  32.             zones_size[ZONE_HIGHMEM] = high - low;      //zones_size={0x1000, 0x37000, 0x7ffe},都是用size表示的
  33. #endif
  34.         }
  35.         free_area_init(zones_size);                //初始化各个管理区
  36.     }
  37.     return;
  38. }

2.1.3最后mm/page_alloc.c中-->paging_init-->free_area_init
函数的作用是:初始化zone结构体,使mem_map中virtual都指向相应的虚拟地址

  1. /*
  2.  * Set up the zone data structures:
  3.  * - mark all pages reserved
  4.  * - mark all memory queues empty
  5.  * - clear the memory bitmaps
  6.  */
  7. //zones_size={0x1000, 0x37000, 0x7ffe},都是用size表示的
  8. void __init free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap,
  9.     unsigned long *zones_size, unsigned long zone_start_paddr, 
  10.     unsigned long *zholes_size, struct page *lmem_map)
  11. {
  12.     struct page *p;
  13.     unsigned long i, j;
  14.     unsigned long map_size;
  15.     unsigned long totalpages, offset, realtotalpages;
  16.     const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1);

  17.     if (zone_start_paddr & ~PAGE_MASK)
  18.         BUG();
  19.     //计算DMA+NOMAL+HIGHMEM中所有的页帧数=0x1000+0x37000+0x7FFE=0x3fffe
  20.     totalpages = 0;
  21.     for (= 0; i < MAX_NR_ZONES; i++) {
  22.         unsigned long size = zones_size[i];
  23.         totalpages += size;
  24.     }
  25.     realtotalpages = totalpages;
  26.     if (zholes_size)              //如果有holes(洞),则去掉holes的部分。我这儿没有holes
  27.         for (= 0; i < MAX_NR_ZONES; i++)
  28.             realtotalpages -= zholes_size[i];
  29.             
  30.     printk("On node %d totalpages: %lu\n", nid, realtotalpages);

  31.     INIT_LIST_HEAD(&active_list);
  32.     INIT_LIST_HEAD(&inactive_list);

  33.     /*
  34.      * Some architectures (with lots of mem and discontinous memory
  35.      * maps) have to search for a good mem_map area:
  36.      * For discontigmem, the conceptual mem map array starts from 
  37.      * PAGE_OFFSET, we need to align the actual array onto a mem map 
  38.      * boundary, so that MAP_NR works.
  39.      */
  40. //执行后map_size=0x3FFFF*68=262143*68=17825724=0x10fffbc=16.995M的内存
  41. //需要分配的内存是0x10fffbc/4096=4351.98页内存
  42.     map_size = (totalpages + 1)*sizeof(struct page);
  43.     if (lmem_map == (struct page *)0) {
  44.         lmem_map = (struct page *) alloc_bootmem_node(pgdat, map_size);   //执行后lmem_map=0xc1000000,从16M开始分配544*8页内存
  45.         lmem_map = (struct page *)(PAGE_OFFSET MAP_ALIGN((unsigned long)lmem_map - PAGE_OFFSET));  //物理地址68字节对齐后0xc1000010
  46.     }
  47.     *gmap = pgdat->node_mem_map = lmem_map;                //都等于0xc1000010,这儿将全局变量mem_map设为0xc1000010
  48.     pgdat->node_size = totalpages;                         //0x3fffe
  49.     pgdat->node_start_paddr = zone_start_paddr;            //0x0
  50.     pgdat->node_start_mapnr = (lmem_map - mem_map);        //0x0
  51.     pgdat->nr_zones = 0;

  52.     /*
  53.      * Initially all pages are reserved - free ones are freed
  54.      * up by free_all_bootmem() once the early boot process is
  55.      * done.
  56.      */
  57.     //初始化page链表结构ww
  58.     for (= lmem_map; p < lmem_map + totalpages; p++) {
  59.         set_page_count(p, 0);                //初始时所有页表使用count=0
  60.         SetPageReserved(p);                  //初始时所有页表的flags=reserved
  61.         init_waitqueue_head(&p->wait);       
  62.         memlist_init(&p->list);
  63.     }

  64.     offset = lmem_map - mem_map;                         //这儿offset是0
  65.     for (= 0; j < MAX_NR_ZONES; j++) {
  66.         //初始化结构体zone中的数据
  67.         zone_t *zone = pgdat->node_zones + j;
  68.         unsigned long mask;
  69.         unsigned long size, realsize;

  70.         realsize = size = zones_size[j];
  71.         if (zholes_size)
  72.             realsize -= zholes_size[j];

  73.         printk("zone(%lu): %lu pages.\n", j, size);
  74.         zone->size = size;
  75.         zone->name = zone_names[j];        //zone_name[0]=DMA,name[1]=NORMAL,name[2]=HIGHMEM
  76.         zone->lock = SPIN_LOCK_UNLOCKED;
  77.         zone->zone_pgdat = pgdat;
  78.         zone->free_pages = 0;
  79.         zone->need_balance = 0;
  80.         if (!size)
  81.             continue;
  82.     
  83.         pgdat->nr_zones = j+1;

  84.         mask = (realsize / zone_balance_ratio[j]);
  85.         if (mask < zone_balance_min[j])
  86.             mask = zone_balance_min[j];
  87.         else if (mask > zone_balance_max[j])
  88.             mask = zone_balance_max[j];
  89.         zone->pages_min = mask;
  90.         zone->pages_low = mask*2;
  91.         zone->pages_high = mask*3;

  92.         zone->zone_mem_map = mem_map + offset;
  93.         zone->zone_start_mapnr = offset;
  94.         zone->zone_start_paddr = zone_start_paddr;

  95.         if ((zone_start_paddr >> PAGE_SHIFT) & (zone_required_alignment-1))
  96.             printk("BUG: wrong zone alignment, it will crash\n");
  97.         //初始化mem_map中的virtual
  98.         //这个循环让该zone内的所有page都指向zone,并让page->virtaul指向相应的虚拟地址
  99.         //假如当前是zone[0],第0个page->virtual=0xc0000000,第1个virtual=0xc0001000以此类推
  100.         for (= 0; i < size; i++) {
  101.             struct page *page = mem_map + offset + i;   //在内存管理区找到相应的page结构体指针
  102.             page->zone = zone;                          //[0-0x1000]的page指向zone->dma [0x1000-896M]的page指向zone->normal
  103.             if (!= ZONE_HIGHMEM)
  104.                 page->virtual = __va(zone_start_paddr);  //virtual=0xc0000000+4K*i
  105.             zone_start_paddr += PAGE_SIZE;
  106.         }

  107.         offset += size;
  108.         for (= 0; ; i++) {
  109.             unsigned long bitmap_size;

  110.             memlist_init(&zone->free_area[i].free_list);
  111.             if (== MAX_ORDER-1) {
  112.                 zone->free_area[i].map = NULL;
  113.                 break;
  114.             }
  115.             bitmap_size = (size-1) >> (i+4);
  116.             bitmap_size = LONG_ALIGN(bitmap_size+1);
  117.             zone->free_area[i].map = (unsigned long *) alloc_bootmem_node(pgdat, bitmap_size);
  118.         }
  119.     }
  120.     build_zonelists(pgdat);
  121. }
上述代码执行后三个zone的信息如下所示:
  1. p /x contig_page_data
  2. DMA={lock = {lock = 0x1, magic = 0xdead4ead}, free_pages = 0x0,(0xd17) pages_min = 0x20, pages_low = 0x40, pages_high = 0x60, need_balance = 0x0, 
  3.  free_area = {
  4.   {free_list = {next = 0xc02d07fc, prev = 0xc02d07fc}, map = 0xc2100000}, {free_list = {next = 0xc02d0808, prev = 0xc02d0808}, map = 0xc2100100},  -->free_list中没有结点其prev与next都指向本身
  5.   {free_list = {next = 0xc02d0814, prev = 0xc02d0814}, map = 0xc2100180}, {free_list = {next = 0xc02d0820, prev = 0xc02d0820}, map = 0xc2100200}, 
  6.   {free_list = {next = 0xc02d082c, prev = 0xc02d082c}, map = 0xc2100280}, {free_list = {next = 0xc02d0838, prev = 0xc02d0838}, map = 0xc2100300}, 
  7.   {free_list = {next = 0xc02d0844, prev = 0xc02d0844}, map = 0xc2100380}, {free_list = {next = 0xc02d0850, prev = 0xc02d0850}, map = 0xc2100400}, 
  8.   {free_list = {next = 0xc02d085c, prev = 0xc02d085c}, map = 0xc2100480}, {free_list = {next = 0xc02d0868, prev = 0xc02d0868}, map = 0x0}}, 
  9.   zone_pgdat = 0xc02d07e0, zone_mem_map = 0xc1000010, zone_start_paddr = 0x0(0M的物理地址), zone_start_mapnr = 0x0, name = 0xc0294ea0 "DMA", size = 0x1000}, 
  10.   
  11. Normal={lock = {lock = 0x1, magic = 0xdead4ead}, free_pages = 0x0,(0x35ef5) pages_min = 0xff, pages_low = 0x1fe, pages_high = 0x2fd, need_balance = 0x0,
  12.  free_area = {
  13.   {free_list = {next = 0xc02d08a8, prev = 0xc02d08a8}, map = 0xc2100500}, {free_list = {next = 0xc02d08b4, prev = 0xc02d08b4}, map = 0xc2103c00},  -->free_list中没有结点其prev与next都指向本身
  14.   {free_list = {next = 0xc02d08c0, prev = 0xc02d08c0}, map = 0xc2105780}, {free_list = {next = 0xc02d08cc, prev = 0xc02d08cc}, map = 0xc2106580}, 
  15.   {free_list = {next = 0xc02d08d8, prev = 0xc02d08d8}, map = 0xc2106c80}, {free_list = {next = 0xc02d08e4, prev = 0xc02d08e4}, map = 0xc2107000}, 
  16.   {free_list = {next = 0xc02d08f0, prev = 0xc02d08f0}, map = 0xc2107200}, {free_list = {next = 0xc02d08fc, prev = 0xc02d08fc}, map = 0xc2107300}, 
  17.   {free_list = {next = 0xc02d0908, prev = 0xc02d0908}, map = 0xc2107380}, {free_list = {next = 0xc02d0914, prev = 0xc02d0914}, map = 0x0}}, 
  18.   zone_pgdat = 0xc02d07e0, zone_mem_map = 0xc1044010, zone_start_paddr = 0x1000000(16M的物理地址), zone_start_mapnr = 0x1000, name = 0xc0294ea4 "Normal", size = 0x37000}, 
  19.   
  20. HighMem={lock = {lock = 0x1, magic = 0xdead4ead}, free_pages = 0x0,(0x7ffe) pages_min = 0xff, pages_low = 0x1fe, pages_high = 0x2fd, need_balance = 0x0, 
  21.  free_area = {
  22.   {free_list = {next = 0xc02d0954, prev = 0xc02d0954}, map = 0xc2107400}, {free_list = {next = 0xc02d0960, prev = 0xc02d0960}, map = 0xc2107c00},  -->free_list中没有结点其prev与next都指向本身
  23.   {free_list = {next = 0xc02d096c, prev = 0xc02d096c}, map = 0xc2108000}, {free_list = {next = 0xc02d0978, prev = 0xc02d0978}, map = 0xc2108200},
  24.   {free_list = {next = 0xc02d0984, prev = 0xc02d0984}, map = 0xc2108300}, {free_list = {next = 0xc02d0990, prev = 0xc02d0990}, map = 0xc2108380}, 
  25.   {free_list = {next = 0xc02d099c, prev = 0xc02d099c}, map = 0xc2108400}, {free_list = {next = 0xc02d09a8, prev = 0xc02d09a8}, map = 0xc2108480}, 
  26.   {free_list = {next = 0xc02d09b4, prev = 0xc02d09b4}, map = 0xc2108500}, {free_list = {next = 0xc02d09c0, prev = 0xc02d09c0}, map = 0x0}}, 
  27.   zone_pgdat = 0xc02d07e0, zone_mem_map = 0xc1ee0010, zone_start_paddr = 0x38000000(896M的物理地址), zone_start_mapnr = 0x38000, name = 0xc0294eab "HighMem", size = 0x7ffe}
注: 上面的free_pages还没有初始化,这个需要等到mem_init中调用free_all_bootmem时才被填充

2.1.4 在mm/page/alloc.c中-->setup_arch-->paging_init-->free_area_init-->build_zonelists
构造一个回退(fallback)管理区,下面看理清楚
  1. static inline void build_zonelists(pg_data_t *pgdat)
  2. {
  3.     int i, j, k;

  4.     for (= 0; i <= GFP_ZONEMASK; i++) {
  5.         zonelist_t *zonelist;
  6.         zone_t *zone;

  7.         zonelist = pgdat->node_zonelists + i;
  8.         memset(zonelist, 0, sizeof(*zonelist));

  9.         j = 0;
  10.         k = ZONE_NORMAL;
  11.         if (i & __GFP_HIGHMEM)
  12.             k = ZONE_HIGHMEM;
  13.         if (i & __GFP_DMA)
  14.             k = ZONE_DMA;

  15.         switch (k) {
  16.             default:
  17.                 BUG();
  18.             /*
  19.              * fallthrough:
  20.              */
  21.             case ZONE_HIGHMEM:
  22.                 zone = pgdat->node_zones + ZONE_HIGHMEM;
  23.                 if (zone->size) {
  24. #ifndef CONFIG_HIGHMEM
  25.                     BUG();
  26. #endif
  27.                     zonelist->zones[j++] = zone;
  28.                 }
  29.             case ZONE_NORMAL:
  30.                 zone = pgdat->node_zones + ZONE_NORMAL;
  31.                 if (zone->size)
  32.                     zonelist->zones[j++] = zone;
  33.             case ZONE_DMA:
  34.                 zone = pgdat->node_zones + ZONE_DMA;
  35.                 if (zone->size)
  36.                     zonelist->zones[j++] = zone;
  37.         }
  38.         zonelist->zones[j++] = NULL;
  39.     } 
  40. }
2.1.4说明 下面是打印的zonelist的内容
  1. (gdb) p &pgdat->node_zones[0] -->DMA
  2. $26 = (zone_t *) 0xc02d07e0 <contig_page_data>   zone_dma
  3. (gdb) p &pgdat->node_zones[1] -->Normal
  4. $27 = (zone_t *) 0xc02d088c <contig_page_data+172>
  5. (gdb) p &pgdat->node_zones[2] -->HighMem
  6. $28 = (zone_t *) 0xc02d0938 <contig_page_data+344>
  7. 7e0-->7DMA 88C-->8Normal 938-->9HIghMem

  8. (gdb) p pgdat->node_zonelists[0]
  9. $35 = {zones = {0xc02d088c <contig_page_data+172>, 0xc02d07e0 <contig_page_data>, 0x0, 0x0}}                               -->8Normal+7DMA
  10. (gdb) p pgdat->node_zonelists[1]
  11. $36 = {zones = {0xc02d07e0 <contig_page_data>, 0x0, 0x0, 0x0}}                                                             -->7DMA
  12. (gdb) p pgdat->node_zonelists[2]
  13. $37 = {zones = {0xc02d0938 <contig_page_data+344>, 0xc02d088c <contig_page_data+172>, 0xc02d07e0 <contig_page_data>, 0x0}} -->9HighMem+8Normal+7DMA
  14. (gdb) p pgdat->node_zonelists[3]
  15. $38 = {zones = {0xc02d07e0 <contig_page_data>, 0x0, 0x0, 0x0}}                                                              -->7DMA
  16. (gdb) p pgdat->node_zonelists[4]
  17. $39 = {zones = {0xc02d088c <contig_page_data+172>, 0xc02d07e0 <contig_page_data>, 0x0, 0x0}}                               -->8Normal+7DMA
  18. (gdb) p pgdat->node_zonelists[5]
  19. $40 = {zones = {0xc02d07e0 <contig_page_data>, 0x0, 0x0, 0x0}}                                                             -->7DMA    
  20. (gdb) p pgdat->node_zonelists[6]
  21. $41 = {zones = {0xc02d0938 <contig_page_data+344>, 0xc02d088c <contig_page_data+172>, 0xc02d07e0 <contig_page_data>, 0x0}} -->9HighMem+8Normal+7DMA
  22. (gdb) p pgdat->node_zonelists[7]
  23. $42 = {zones = {0xc02d07e0 <contig_page_data>, 0x0, 0x0, 0x0}}                                                              -->7DMA  
  24. (gdb) p pgdat->node_zonelists[8]
  25. $43 = {zones = {0xc02d088c <contig_page_data+172>, 0xc02d07e0 <contig_page_data>, 0x0, 0x0}}                                -->8Normal+7DMA
  26. (gdb) p pgdat->node_zonelists[9]
  27. $44 = {zones = {0xc02d07e0 <contig_page_data>, 0x0, 0x0, 0x0}}                                                              -->7DMA
  28. (gdb) p pgdat->node_zonelists[10]
  29. $45 = {zones = {0xc02d0938 <contig_page_data+344>, 0xc02d088c <contig_page_data+172>, 0xc02d07e0 <contig_page_data>, 0x0}} -->9HighMem+8Normal+7DMA
  30. (gdb) p pgdat->node_zonelists[11]
  31. $46 = {zones = {0xc02d07e0 <contig_page_data>, 0x0, 0x0, 0x0}}                                                              -->7DMA 
  32. (gdb) p pgdat->node_zonelists[12]
  33. $47 = {zones = {0xc02d088c <contig_page_data+172>, 0xc02d07e0 <contig_page_data>, 0x0, 0x0}}                                -->8Normal+7DMA
  34. (gdb) p pgdat->node_zonelists[13]
  35. $48 = {zones = {0xc02d07e0 <contig_page_data>, 0x0, 0x0, 0x0}}                                                              -->7DMA
  36. (gdb) p pgdat->node_zonelists[14]
  37. $49 = {zones = {0xc02d0938 <contig_page_data+344>, 0xc02d088c <contig_page_data+172>, 0xc02d07e0 <contig_page_data>, 0x0}} -->9HighMem+8Normal+7DMA
  38. (gdb) p pgdat->node_zonelists[15]
  39. $50 = {zones = {0xc02d07e0 <contig_page_data>, 0x0, 0x0, 0x0}}                                                             -->7DMA
  40. (gdb) p pgdat->node_zonelists[16]
  41. $51 = {zones = {0x3, 0xc1000010, 0x0, 0xc035dc04 <contig_bootmem_data>}}
具体这个函数有什么作用,以后用的时候再添加。
2.1 在arch/i386/mm/init.c中-->start_kernel-->mem_init
  1. void __init mem_init(void)
  2. {
  3.     extern int ppro_with_ram_bug(void);
  4.     int codesize, reservedpages, datasize, initsize;
  5.     int tmp;
  6.     int bad_ppro;

  7.     if (!mem_map)     //mem_map=0xc1000010,从16M开始分配内存空间,物理地址68字节(struct mem_map_t)对齐
  8.         BUG();
  9.     
  10.     bad_ppro = ppro_with_ram_bug();

  11. #ifdef CONFIG_HIGHMEM
  12.     highmem_start_page = mem_map + highstart_pfn;   //高端内存在page管理区的地址=0xc1000010+0x38000*68=0xc1ee0010
  13.     max_mapnr = num_physpages = highend_pfn;        //0x3fffe=可以理解为实际内存容量的页帧
  14. #else
  15.     max_mapnr = num_physpages = max_low_pfn;
  16. #endif
  17.     high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);   //高端内存的虚拟地址=0xf8000000=896M的虚拟地址

  18.     /* clear the zero-page */
  19.     memset(empty_zero_page, 0, PAGE_SIZE);           //将empty_zero_page即0xc0104000-0xc0104000+4K的地址清0
  20.     
  21. //执行后,对可用内存会清掉其reserved属性,并加入到freelist中
  22.     totalram_pages += free_all_bootmem();

  23. //统计reserved的page的数量
  24.     reservedpages = 0;
  25.     for (tmp = 0; tmp < max_low_pfn; tmp++)                 //max_low_pfn=0x38000
  26.         if (page_is_ram(tmp) && PageReserved(mem_map+tmp))  //在e820的map中查看这个页帧是不是属于E820_RAM,并且属性是reserved
  27.             reservedpages++;                                

  28. //执行后,对高端的可用内存清掉其reserved属性,并加入到zone_high的freelist中
  29. #ifdef CONFIG_HIGHMEM
  30.     for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {    //我这儿分配了1G内存,所以highstart_pfn=0x38000,highend_pfn=0x3fffe
  31.         struct page *page = mem_map + tmp;

  32.         if (!page_is_ram(tmp)) {              //如果在e820的map中不属于E820_RAM,说明内存不可用,直接跳过
  33.             SetPageReserved(page);
  34.             continue;
  35.         }
  36.         if (bad_ppro && page_kills_ppro(tmp))
  37.         {
  38.             SetPageReserved(page);
  39.             continue;
  40.         }
  41.         ClearPageReserved(page);
  42.         set_bit(PG_highmem, &page->flags);
  43.         atomic_set(&page->count, 1);
  44.         __free_page(page);
  45.         totalhigh_pages++;
  46.     }
  47.     totalram_pages += totalhigh_pages;
  48. #endif
  49.     codesize = (unsigned long) &_etext - (unsigned long) &_text;
  50.     datasize = (unsigned long) &_edata - (unsigned long) &_etext;
  51.     initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;

  52.     printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
  53.         (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
  54.         max_mapnr << (PAGE_SHIFT-10),
  55.         codesize >> 10,
  56.         reservedpages << (PAGE_SHIFT-10),
  57.         datasize >> 10,
  58.         initsize >> 10,
  59.         (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
  60.      );

  61. #if CONFIG_X86_PAE
  62.     if (!cpu_has_pae)
  63.         panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
  64. #endif
  65.     if (boot_cpu_data.wp_works_ok < 0)
  66.         test_wp_bit();

  67.    #ifndef CONFIG_SMP
        zap_low_mappings();
        #endif
  68. }



在mm/bootmem.c中-->mem_init-->free_all_bootmem-->free_all_bootmem_core(&contig_page_data)
  1. static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
  2. {
  3.     struct page *page = pgdat->node_mem_map;    //page=0xc1000010,即mem_map的首地址
  4.     bootmem_data_t *bdata = pgdat->bdata;
  5.     unsigned long i, count, total = 0;
  6.     unsigned long idx;

  7.     if (!bdata->node_bootmem_map) BUG();

  8.     count = 0;
  9.    //bdata={node_boot_start = 0x0, node_low_pfn = 0x38000, node_bootmem_map = 0xc0381000, last_offset = 0x1a0, last_pos = 0x210a}
  10.     idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);  //执行后idx=0x38000

  11. //下面将在bootm中申请的内存位图本身占的pages
  12.     for (i = 0; i < idx; i++, page++) {
  13.         if (!test_bit(i, bdata->node_bootmem_map)) {      //对于bootm中没有使用的内存让buddy接管
  14.             count++;
  15.             ClearPageReserved(page);      //在mem_map初始化时对每个page设置了reserved标志,这儿清这个标志位
  16.             set_page_count(page, 1);      //为啥这儿要设为1呢?应该是0才对,看下面这个__free_page函数就知道了
  17.             __free_page(page);            //将page放到buddy中去
  18.         }
  19.     }
  20.     total += count;

  21. //下面释放bootm位图本身占的pages
  22.     page = virt_to_page(bdata->node_bootmem_map);
  23.     count = 0;
  24.     for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) {
  25.         count++;
  26.         ClearPageReserved(page);
  27.         set_page_count(page, 1);
  28.         __free_page(page);
  29.     }
  30.     total += count;
  31.     bdata->node_bootmem_map = NULL;

  32.     return total;
  33. }


__free_page(page);
-->#define __free_page(page) __free_pages((page), 0)
  1. void __free_pages(struct page *page, unsigned int order)
  2. {
  3.     //put_page_testzero会把page->count减1,所以刚才是set_page_count=1
  4.     if (!PageReserved(page) && put_page_testzero(page))      
  5.         __free_pages_ok(page, order);
  6. }
  7. //把p->count减1,然后看这个p->count是不是等于0
  8. //return true if the result is 0
  9. #define put_page_testzero(p) atomic_dec_and_test(&(p)->count) 

在mm/page_alloc.c中L66
  1. static void __free_pages_ok (struct page *page, unsigned int order)
  2. {
  3.     unsigned long index, page_idx, mask, flags;
  4.     free_area_t *area;
  5.     struct page *base;
  6.     zone_t *zone;

  7.     /* Yes, think what happens when other parts of the kernel take
  8.      * a reference to a page in order to pin it for io. -ben
  9.      */
  10.     if (PageLRU(page))
  11.         lru_cache_del(page);

  12.     if (page->buffers)
  13.         BUG();
  14.     if (page->mapping)
  15.         BUG();
  16.     if (!VALID_PAGE(page))
  17.         BUG();
  18.     if (PageSwapCache(page))
  19.         BUG();
  20.     if (PageLocked(page))
  21.         BUG();
  22.     if (PageLRU(page))
  23.         BUG();
  24.     if (PageActive(page))
  25.         BUG();
  26.     page->flags &= ~((1<<PG_referenced) | (1<<PG_dirty));   //清除dirty标志

  27.     if (current->flags & PF_FREE_PAGES)
  28.         goto local_freelist;
  29.  back_local_freelist:

  30.     zone = page->zone;    //在mem_map初始化free_area_init_core>时,就把每个page所指向的zone指定好了

  31.     mask = (~0UL) << order;
  32.     base = zone->zone_mem_map;       //base就是zone在mem_map的地址,例zone_dma就是mem_map的首地址0xc1000010
  33.     page_idx = page - base;          //查看page在zone的第几项
  34.     if (page_idx & ~mask)
  35.         BUG();
  36.     index = page_idx >> (1 + order);

  37.     area = zone->free_area + order;

  38.     spin_lock_irqsave(&zone->lock, flags);

  39.     zone->free_pages -= mask;                 //mask是unsigned long,以补码形式存在的-1,所以每次调用__free_page_ok,则zone->free_pages会加1

  40.     while (mask + (1 << (MAX_ORDER-1))) {        //这个过程用文字不好解释,直接看下面的图了
  41.         struct page *buddy1, *buddy2;

  42.         if (area >= zone->free_area + MAX_ORDER)
  43.             BUG();
  44.         if (!__test_and_change_bit(index, area->map))    
  45.             break;                                       
  46.         /*
  47.          * Move the buddy up one level.
  48.          */
  49.         buddy1 = base + (page_idx ^ -mask);
  50.         buddy2 = base + page_idx;
  51.         if (BAD_RANGE(zone,buddy1))
  52.             BUG();
  53.         if (BAD_RANGE(zone,buddy2))
  54.             BUG();

  55.         memlist_del(&buddy1->list);
  56.         mask <<= 1;
  57.         area++;
  58.         index >>= 1;
  59.         page_idx &= mask;
  60.     }
  61.     memlist_add_head(&(base + page_idx)->list, &area->free_list); //放到free_list中去

  62.     spin_unlock_irqrestore(&zone->lock, flags);
  63.     return;

  64.  local_freelist:
  65.     if (current->nr_local_pages)
  66.         goto back_local_freelist;
  67.     if (in_interrupt())
  68.         goto back_local_freelist;        

  69.     list_add(&page->list, &current->local_pages);
  70.     page->index = order;
  71.     current->nr_local_pages++;
  72. }

下面是生成free_list的过程:






最终的链表:

zone中只保存list的head,例order_9的head是保存在zone->free_area[9].free_list中的,对于head的prev与next是保存在page的list中的,
所以说zone的free_area[n].free_list 再加上 相应page中的list构成了一条完整的list
附录1. __free_pages_ok
关于unsigned long取反
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. int main ( int argc, char *argv[] )
  4. {
  5.     unsigned long mask;
  6.     mask = (~0UL);
  7.     printf("mask=%ld\n", mask);       //mask=-1
  8.     mask = (~0UL)<<1;
  9.     printf("mask=%ld\n", mask);       //mask=-2
  10.     mask = (~0UL)<<2;
  11.     printf("mask=%ld\n", mask);       //mask=-4
  12.     return EXIT_SUCCESS;
  13. }
附录2: 
  1. static __inline__ int __test_and_change_bit(int nr, volatile void * addr)
  2. {
  3.     int oldbit;
  4.     __asm__ __volatile__(
  5.         "btcl %2,%1\n\tsbbl %0,%0"
  6.         :"=r" (oldbit),"=m" (ADDR)
  7.         :"Ir" (nr) : "memory");
  8.     return oldbit;
  9. }
BTC %2, %1  -->%1是addr   2是nr
先把%1对应的内存地址的第%2位的值填入cflags寄存器的进位位C,然后把该位反转
原先addr的nr位是1,则返回1,并将nr位反转
原先addr的nr位是0,则返回0,并将nr位反转





阅读(1990) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~