一. 总体说明
1.1 页表的初始化都是在paging_init中
paging_initsetup_arch
--> paging_init
{
a.free_area_init_core中在物理地址16M=0xC1000000处,建立了mem_map用page去管理内存
a.1 刚开始时设置page_count=0
a.2 设置flag=reserved
a.3 初始化zone结构体,使mem_map中virtual都指向相应的虚拟地址
b.free_all_bootmem_core
}
二.代码分析
在arch/i386/mm/init.c中-->start_kernel-->setup_arch-->paging_init
-
void __init paging_init(void)
-
{
-
pagetable_init(); //2.1设置页目录表
-
//将swapper_pg_dir=0xc0101000,赋到cr3中,告诉cpu这个是页目录表的基地址
-
__asm__( "movl %%ecx,%%cr3\n" ::"c"(__pa(swapper_pg_dir)));
-
-
#if CONFIG_X86_PAE
-
/*
-
* We will bail out later - printk doesnt work right now so
-
* the user would just see a hanging kernel.
-
*/
-
if (cpu_has_pae)
-
set_in_cr4(X86_CR4_PAE);
-
#endif
-
//flush一下使修改后的页目录表生效
-
__flush_tlb_all();
-
-
#ifdef CONFIG_HIGHMEM
-
kmap_init(); //初始化几个变量kmap_vstart=0xffff5000,kmap_pte=0xc0003fd4,kmap_prot=0x163
-
#endif
-
//初始化三个管理区的size
-
{
-
unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; //MAX_NR_ZONES=3,DMA,NORMAL,HIGH
-
unsigned int max_dma, high, low;
-
-
max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; //MAX_DMA_ADDRESS=16M,取物理地址的页帧=0x1000
-
low = max_low_pfn; //low=896M的页帧=0x38000
-
high = highend_pfn; //high=实际内存的页帧=我这儿给qemu了1G内存=0x3fffe
-
-
if (low < max_dma)
-
zones_size[ZONE_DMA] = low;
-
else {
-
zones_size[ZONE_DMA] = max_dma;
-
zones_size[ZONE_NORMAL] = low - max_dma;
-
#ifdef CONFIG_HIGHMEM
-
zones_size[ZONE_HIGHMEM] = high - low; //zones_size={0x1000, 0x37000, 0x7ffe},都是用size表示的
-
#endif
-
}
-
free_area_init(zones_size); //初始化各个管理区
-
}
-
return;
-
}
2.1.3最后mm/page_alloc.c中-->paging_init-->free_area_init
函数的作用是:初始化zone结构体,使mem_map中virtual都指向相应的虚拟地址
-
/*
-
* Set up the zone data structures:
-
* - mark all pages reserved
-
* - mark all memory queues empty
-
* - clear the memory bitmaps
-
*/
-
//zones_size={0x1000, 0x37000, 0x7ffe},都是用size表示的
-
void __init free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap,
-
unsigned long *zones_size, unsigned long zone_start_paddr,
-
unsigned long *zholes_size, struct page *lmem_map)
-
{
-
struct page *p;
-
unsigned long i, j;
-
unsigned long map_size;
-
unsigned long totalpages, offset, realtotalpages;
-
const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1);
-
-
if (zone_start_paddr & ~PAGE_MASK)
-
BUG();
-
//计算DMA+NOMAL+HIGHMEM中所有的页帧数=0x1000+0x37000+0x7FFE=0x3fffe
-
totalpages = 0;
-
for (i = 0; i < MAX_NR_ZONES; i++) {
-
unsigned long size = zones_size[i];
-
totalpages += size;
-
}
-
realtotalpages = totalpages;
-
if (zholes_size) //如果有holes(洞),则去掉holes的部分。我这儿没有holes
-
for (i = 0; i < MAX_NR_ZONES; i++)
-
realtotalpages -= zholes_size[i];
-
-
printk("On node %d totalpages: %lu\n", nid, realtotalpages);
-
-
INIT_LIST_HEAD(&active_list);
-
INIT_LIST_HEAD(&inactive_list);
-
-
/*
-
* Some architectures (with lots of mem and discontinous memory
-
* maps) have to search for a good mem_map area:
-
* For discontigmem, the conceptual mem map array starts from
-
* PAGE_OFFSET, we need to align the actual array onto a mem map
-
* boundary, so that MAP_NR works.
-
*/
-
//执行后map_size=0x3FFFF*68=262143*68=17825724=0x10fffbc=16.995M的内存
-
//需要分配的内存是0x10fffbc/4096=4351.98页内存
-
map_size = (totalpages + 1)*sizeof(struct page);
-
if (lmem_map == (struct page *)0) {
-
lmem_map = (struct page *) alloc_bootmem_node(pgdat, map_size); //执行后lmem_map=0xc1000000,从16M开始分配544*8页内存
-
lmem_map = (struct page *)(PAGE_OFFSET + MAP_ALIGN((unsigned long)lmem_map - PAGE_OFFSET)); //物理地址68字节对齐后0xc1000010
-
}
-
*gmap = pgdat->node_mem_map = lmem_map; //都等于0xc1000010,这儿将全局变量mem_map设为0xc1000010
-
pgdat->node_size = totalpages; //0x3fffe
-
pgdat->node_start_paddr = zone_start_paddr; //0x0
-
pgdat->node_start_mapnr = (lmem_map - mem_map); //0x0
-
pgdat->nr_zones = 0;
-
-
/*
-
* Initially all pages are reserved - free ones are freed
-
* up by free_all_bootmem() once the early boot process is
-
* done.
-
*/
-
//初始化page链表结构ww
-
for (p = lmem_map; p < lmem_map + totalpages; p++) {
-
set_page_count(p, 0); //初始时所有页表使用count=0
-
SetPageReserved(p); //初始时所有页表的flags=reserved
-
init_waitqueue_head(&p->wait);
-
memlist_init(&p->list);
-
}
-
-
offset = lmem_map - mem_map; //这儿offset是0
-
for (j = 0; j < MAX_NR_ZONES; j++) {
-
//初始化结构体zone中的数据
-
zone_t *zone = pgdat->node_zones + j;
-
unsigned long mask;
-
unsigned long size, realsize;
-
-
realsize = size = zones_size[j];
-
if (zholes_size)
-
realsize -= zholes_size[j];
-
-
printk("zone(%lu): %lu pages.\n", j, size);
-
zone->size = size;
-
zone->name = zone_names[j]; //zone_name[0]=DMA,name[1]=NORMAL,name[2]=HIGHMEM
-
zone->lock = SPIN_LOCK_UNLOCKED;
-
zone->zone_pgdat = pgdat;
-
zone->free_pages = 0;
-
zone->need_balance = 0;
-
if (!size)
-
continue;
-
-
pgdat->nr_zones = j+1;
-
-
mask = (realsize / zone_balance_ratio[j]);
-
if (mask < zone_balance_min[j])
-
mask = zone_balance_min[j];
-
else if (mask > zone_balance_max[j])
-
mask = zone_balance_max[j];
-
zone->pages_min = mask;
-
zone->pages_low = mask*2;
-
zone->pages_high = mask*3;
-
-
zone->zone_mem_map = mem_map + offset;
-
zone->zone_start_mapnr = offset;
-
zone->zone_start_paddr = zone_start_paddr;
-
-
if ((zone_start_paddr >> PAGE_SHIFT) & (zone_required_alignment-1))
-
printk("BUG: wrong zone alignment, it will crash\n");
-
//初始化mem_map中的virtual
-
//这个循环让该zone内的所有page都指向zone,并让page->virtaul指向相应的虚拟地址
-
//假如当前是zone[0],第0个page->virtual=0xc0000000,第1个virtual=0xc0001000以此类推
-
for (i = 0; i < size; i++) {
-
struct page *page = mem_map + offset + i; //在内存管理区找到相应的page结构体指针
-
page->zone = zone; //[0-0x1000]的page指向zone->dma [0x1000-896M]的page指向zone->normal
-
if (j != ZONE_HIGHMEM)
-
page->virtual = __va(zone_start_paddr); //virtual=0xc0000000+4K*i
-
zone_start_paddr += PAGE_SIZE;
-
}
-
-
offset += size;
-
for (i = 0; ; i++) {
-
unsigned long bitmap_size;
-
-
memlist_init(&zone->free_area[i].free_list);
-
if (i == MAX_ORDER-1) {
-
zone->free_area[i].map = NULL;
-
break;
-
}
-
bitmap_size = (size-1) >> (i+4);
-
bitmap_size = LONG_ALIGN(bitmap_size+1);
-
zone->free_area[i].map = (unsigned long *) alloc_bootmem_node(pgdat, bitmap_size);
-
}
-
}
-
build_zonelists(pgdat);
-
}
上述代码执行后三个zone的信息如下所示:
-
p /x contig_page_data
-
DMA={lock = {lock = 0x1, magic = 0xdead4ead}, free_pages = 0x0,(0xd17) pages_min = 0x20, pages_low = 0x40, pages_high = 0x60, need_balance = 0x0,
-
free_area = {
-
{free_list = {next = 0xc02d07fc, prev = 0xc02d07fc}, map = 0xc2100000}, {free_list = {next = 0xc02d0808, prev = 0xc02d0808}, map = 0xc2100100}, -->free_list中没有结点其prev与next都指向本身
-
{free_list = {next = 0xc02d0814, prev = 0xc02d0814}, map = 0xc2100180}, {free_list = {next = 0xc02d0820, prev = 0xc02d0820}, map = 0xc2100200},
-
{free_list = {next = 0xc02d082c, prev = 0xc02d082c}, map = 0xc2100280}, {free_list = {next = 0xc02d0838, prev = 0xc02d0838}, map = 0xc2100300},
-
{free_list = {next = 0xc02d0844, prev = 0xc02d0844}, map = 0xc2100380}, {free_list = {next = 0xc02d0850, prev = 0xc02d0850}, map = 0xc2100400},
-
{free_list = {next = 0xc02d085c, prev = 0xc02d085c}, map = 0xc2100480}, {free_list = {next = 0xc02d0868, prev = 0xc02d0868}, map = 0x0}},
-
zone_pgdat = 0xc02d07e0, zone_mem_map = 0xc1000010, zone_start_paddr = 0x0(0M的物理地址), zone_start_mapnr = 0x0, name = 0xc0294ea0 "DMA", size = 0x1000},
-
-
Normal={lock = {lock = 0x1, magic = 0xdead4ead}, free_pages = 0x0,(0x35ef5) pages_min = 0xff, pages_low = 0x1fe, pages_high = 0x2fd, need_balance = 0x0,
-
free_area = {
-
{free_list = {next = 0xc02d08a8, prev = 0xc02d08a8}, map = 0xc2100500}, {free_list = {next = 0xc02d08b4, prev = 0xc02d08b4}, map = 0xc2103c00}, -->free_list中没有结点其prev与next都指向本身
-
{free_list = {next = 0xc02d08c0, prev = 0xc02d08c0}, map = 0xc2105780}, {free_list = {next = 0xc02d08cc, prev = 0xc02d08cc}, map = 0xc2106580},
-
{free_list = {next = 0xc02d08d8, prev = 0xc02d08d8}, map = 0xc2106c80}, {free_list = {next = 0xc02d08e4, prev = 0xc02d08e4}, map = 0xc2107000},
-
{free_list = {next = 0xc02d08f0, prev = 0xc02d08f0}, map = 0xc2107200}, {free_list = {next = 0xc02d08fc, prev = 0xc02d08fc}, map = 0xc2107300},
-
{free_list = {next = 0xc02d0908, prev = 0xc02d0908}, map = 0xc2107380}, {free_list = {next = 0xc02d0914, prev = 0xc02d0914}, map = 0x0}},
-
zone_pgdat = 0xc02d07e0, zone_mem_map = 0xc1044010, zone_start_paddr = 0x1000000(16M的物理地址), zone_start_mapnr = 0x1000, name = 0xc0294ea4 "Normal", size = 0x37000},
-
-
HighMem={lock = {lock = 0x1, magic = 0xdead4ead}, free_pages = 0x0,(0x7ffe) pages_min = 0xff, pages_low = 0x1fe, pages_high = 0x2fd, need_balance = 0x0,
-
free_area = {
-
{free_list = {next = 0xc02d0954, prev = 0xc02d0954}, map = 0xc2107400}, {free_list = {next = 0xc02d0960, prev = 0xc02d0960}, map = 0xc2107c00}, -->free_list中没有结点其prev与next都指向本身
-
{free_list = {next = 0xc02d096c, prev = 0xc02d096c}, map = 0xc2108000}, {free_list = {next = 0xc02d0978, prev = 0xc02d0978}, map = 0xc2108200},
-
{free_list = {next = 0xc02d0984, prev = 0xc02d0984}, map = 0xc2108300}, {free_list = {next = 0xc02d0990, prev = 0xc02d0990}, map = 0xc2108380},
-
{free_list = {next = 0xc02d099c, prev = 0xc02d099c}, map = 0xc2108400}, {free_list = {next = 0xc02d09a8, prev = 0xc02d09a8}, map = 0xc2108480},
-
{free_list = {next = 0xc02d09b4, prev = 0xc02d09b4}, map = 0xc2108500}, {free_list = {next = 0xc02d09c0, prev = 0xc02d09c0}, map = 0x0}},
-
zone_pgdat = 0xc02d07e0, zone_mem_map = 0xc1ee0010, zone_start_paddr = 0x38000000(896M的物理地址), zone_start_mapnr = 0x38000, name = 0xc0294eab "HighMem", size = 0x7ffe}
注: 上面的free_pages还没有初始化,这个需要等到mem_init中调用free_all_bootmem时才被填充
2.1.4 在mm/page/alloc.c中-->setup_arch-->paging_init-->free_area_init-->build_zonelists
构造一个回退(fallback)管理区,下面看理清楚
-
static inline void build_zonelists(pg_data_t *pgdat)
-
{
-
int i, j, k;
-
-
for (i = 0; i <= GFP_ZONEMASK; i++) {
-
zonelist_t *zonelist;
-
zone_t *zone;
-
-
zonelist = pgdat->node_zonelists + i;
-
memset(zonelist, 0, sizeof(*zonelist));
-
-
j = 0;
-
k = ZONE_NORMAL;
-
if (i & __GFP_HIGHMEM)
-
k = ZONE_HIGHMEM;
-
if (i & __GFP_DMA)
-
k = ZONE_DMA;
-
-
switch (k) {
-
default:
-
BUG();
-
/*
-
* fallthrough:
-
*/
-
case ZONE_HIGHMEM:
-
zone = pgdat->node_zones + ZONE_HIGHMEM;
-
if (zone->size) {
-
#ifndef CONFIG_HIGHMEM
-
BUG();
-
#endif
-
zonelist->zones[j++] = zone;
-
}
-
case ZONE_NORMAL:
-
zone = pgdat->node_zones + ZONE_NORMAL;
-
if (zone->size)
-
zonelist->zones[j++] = zone;
-
case ZONE_DMA:
-
zone = pgdat->node_zones + ZONE_DMA;
-
if (zone->size)
-
zonelist->zones[j++] = zone;
-
}
-
zonelist->zones[j++] = NULL;
-
}
-
}
2.1.4说明 下面是打印的zonelist的内容
-
(gdb) p &pgdat->node_zones[0] -->DMA
-
$26 = (zone_t *) 0xc02d07e0 <contig_page_data> zone_dma
-
(gdb) p &pgdat->node_zones[1] -->Normal
-
$27 = (zone_t *) 0xc02d088c <contig_page_data+172>
-
(gdb) p &pgdat->node_zones[2] -->HighMem
-
$28 = (zone_t *) 0xc02d0938 <contig_page_data+344>
-
7e0-->7DMA 88C-->8Normal 938-->9HIghMem
-
-
(gdb) p pgdat->node_zonelists[0]
-
$35 = {zones = {0xc02d088c <contig_page_data+172>, 0xc02d07e0 <contig_page_data>, 0x0, 0x0}} -->8Normal+7DMA
-
(gdb) p pgdat->node_zonelists[1]
-
$36 = {zones = {0xc02d07e0 <contig_page_data>, 0x0, 0x0, 0x0}} -->7DMA
-
(gdb) p pgdat->node_zonelists[2]
-
$37 = {zones = {0xc02d0938 <contig_page_data+344>, 0xc02d088c <contig_page_data+172>, 0xc02d07e0 <contig_page_data>, 0x0}} -->9HighMem+8Normal+7DMA
-
(gdb) p pgdat->node_zonelists[3]
-
$38 = {zones = {0xc02d07e0 <contig_page_data>, 0x0, 0x0, 0x0}} -->7DMA
-
(gdb) p pgdat->node_zonelists[4]
-
$39 = {zones = {0xc02d088c <contig_page_data+172>, 0xc02d07e0 <contig_page_data>, 0x0, 0x0}} -->8Normal+7DMA
-
(gdb) p pgdat->node_zonelists[5]
-
$40 = {zones = {0xc02d07e0 <contig_page_data>, 0x0, 0x0, 0x0}} -->7DMA
-
(gdb) p pgdat->node_zonelists[6]
-
$41 = {zones = {0xc02d0938 <contig_page_data+344>, 0xc02d088c <contig_page_data+172>, 0xc02d07e0 <contig_page_data>, 0x0}} -->9HighMem+8Normal+7DMA
-
(gdb) p pgdat->node_zonelists[7]
-
$42 = {zones = {0xc02d07e0 <contig_page_data>, 0x0, 0x0, 0x0}} -->7DMA
-
(gdb) p pgdat->node_zonelists[8]
-
$43 = {zones = {0xc02d088c <contig_page_data+172>, 0xc02d07e0 <contig_page_data>, 0x0, 0x0}} -->8Normal+7DMA
-
(gdb) p pgdat->node_zonelists[9]
-
$44 = {zones = {0xc02d07e0 <contig_page_data>, 0x0, 0x0, 0x0}} -->7DMA
-
(gdb) p pgdat->node_zonelists[10]
-
$45 = {zones = {0xc02d0938 <contig_page_data+344>, 0xc02d088c <contig_page_data+172>, 0xc02d07e0 <contig_page_data>, 0x0}} -->9HighMem+8Normal+7DMA
-
(gdb) p pgdat->node_zonelists[11]
-
$46 = {zones = {0xc02d07e0 <contig_page_data>, 0x0, 0x0, 0x0}} -->7DMA
-
(gdb) p pgdat->node_zonelists[12]
-
$47 = {zones = {0xc02d088c <contig_page_data+172>, 0xc02d07e0 <contig_page_data>, 0x0, 0x0}} -->8Normal+7DMA
-
(gdb) p pgdat->node_zonelists[13]
-
$48 = {zones = {0xc02d07e0 <contig_page_data>, 0x0, 0x0, 0x0}} -->7DMA
-
(gdb) p pgdat->node_zonelists[14]
-
$49 = {zones = {0xc02d0938 <contig_page_data+344>, 0xc02d088c <contig_page_data+172>, 0xc02d07e0 <contig_page_data>, 0x0}} -->9HighMem+8Normal+7DMA
-
(gdb) p pgdat->node_zonelists[15]
-
$50 = {zones = {0xc02d07e0 <contig_page_data>, 0x0, 0x0, 0x0}} -->7DMA
-
(gdb) p pgdat->node_zonelists[16]
-
$51 = {zones = {0x3, 0xc1000010, 0x0, 0xc035dc04 <contig_bootmem_data>}}
具体这个函数有什么作用,以后用的时候再添加。
2.1 在arch/i386/mm/init.c中-->start_kernel-->mem_init
-
void __init mem_init(void)
-
{
-
extern int ppro_with_ram_bug(void);
-
int codesize, reservedpages, datasize, initsize;
-
int tmp;
-
int bad_ppro;
-
-
if (!mem_map) //mem_map=0xc1000010,从16M开始分配内存空间,物理地址68字节(struct mem_map_t)对齐
-
BUG();
-
-
bad_ppro = ppro_with_ram_bug();
-
-
#ifdef CONFIG_HIGHMEM
-
highmem_start_page = mem_map + highstart_pfn; //高端内存在page管理区的地址=0xc1000010+0x38000*68=0xc1ee0010
-
max_mapnr = num_physpages = highend_pfn; //0x3fffe=可以理解为实际内存容量的页帧
-
#else
-
max_mapnr = num_physpages = max_low_pfn;
-
#endif
-
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); //高端内存的虚拟地址=0xf8000000=896M的虚拟地址
-
-
/* clear the zero-page */
-
memset(empty_zero_page, 0, PAGE_SIZE); //将empty_zero_page即0xc0104000-0xc0104000+4K的地址清0
-
-
//执行后,对可用内存会清掉其reserved属性,并加入到freelist中
-
totalram_pages += free_all_bootmem();
-
-
//统计reserved的page的数量
-
reservedpages = 0;
-
for (tmp = 0; tmp < max_low_pfn; tmp++) //max_low_pfn=0x38000
-
if (page_is_ram(tmp) && PageReserved(mem_map+tmp)) //在e820的map中查看这个页帧是不是属于E820_RAM,并且属性是reserved
-
reservedpages++;
-
-
//执行后,对高端的可用内存清掉其reserved属性,并加入到zone_high的freelist中
-
#ifdef CONFIG_HIGHMEM
-
for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { //我这儿分配了1G内存,所以highstart_pfn=0x38000,highend_pfn=0x3fffe
-
struct page *page = mem_map + tmp;
-
-
if (!page_is_ram(tmp)) { //如果在e820的map中不属于E820_RAM,说明内存不可用,直接跳过
-
SetPageReserved(page);
-
continue;
-
}
-
if (bad_ppro && page_kills_ppro(tmp))
-
{
-
SetPageReserved(page);
-
continue;
-
}
-
ClearPageReserved(page);
-
set_bit(PG_highmem, &page->flags);
-
atomic_set(&page->count, 1);
-
__free_page(page);
-
totalhigh_pages++;
-
}
-
totalram_pages += totalhigh_pages;
-
#endif
-
codesize = (unsigned long) &_etext - (unsigned long) &_text;
-
datasize = (unsigned long) &_edata - (unsigned long) &_etext;
-
initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
-
-
printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
-
(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
-
max_mapnr << (PAGE_SHIFT-10),
-
codesize >> 10,
-
reservedpages << (PAGE_SHIFT-10),
-
datasize >> 10,
-
initsize >> 10,
-
(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
-
);
-
-
#if CONFIG_X86_PAE
-
if (!cpu_has_pae)
-
panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
-
#endif
-
if (boot_cpu_data.wp_works_ok < 0)
-
test_wp_bit();
-
-
#ifndef CONFIG_SMP
zap_low_mappings();
#endif
-
}
在mm/bootmem.c中-->mem_init-->free_all_bootmem-->free_all_bootmem_core(&contig_page_data)
-
static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
-
{
-
struct page *page = pgdat->node_mem_map; //page=0xc1000010,即mem_map的首地址
-
bootmem_data_t *bdata = pgdat->bdata;
-
unsigned long i, count, total = 0;
-
unsigned long idx;
-
-
if (!bdata->node_bootmem_map) BUG();
-
-
count = 0;
-
//bdata={node_boot_start = 0x0, node_low_pfn = 0x38000, node_bootmem_map = 0xc0381000, last_offset = 0x1a0, last_pos = 0x210a}
-
idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); //执行后idx=0x38000
-
-
//下面将在bootm中申请的内存位图本身占的pages
-
for (i = 0; i < idx; i++, page++) {
-
if (!test_bit(i, bdata->node_bootmem_map)) { //对于bootm中没有使用的内存让buddy接管
-
count++;
-
ClearPageReserved(page); //在mem_map初始化时对每个page设置了reserved标志,这儿清这个标志位
-
set_page_count(page, 1); //为啥这儿要设为1呢?应该是0才对,看下面这个__free_page函数就知道了
-
__free_page(page); //将page放到buddy中去
-
}
-
}
-
total += count;
-
-
//下面释放bootm位图本身占的pages
-
page = virt_to_page(bdata->node_bootmem_map);
-
count = 0;
-
for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) {
-
count++;
-
ClearPageReserved(page);
-
set_page_count(page, 1);
-
__free_page(page);
-
}
-
total += count;
-
bdata->node_bootmem_map = NULL;
-
-
return total;
-
}
__free_page(page);
-->#define __free_page(page) __free_pages((page), 0)
-
void __free_pages(struct page *page, unsigned int order)
-
{
-
//put_page_testzero会把page->count减1,所以刚才是set_page_count=1
-
if (!PageReserved(page) && put_page_testzero(page))
-
__free_pages_ok(page, order);
-
}
-
//把p->count减1,然后看这个p->count是不是等于0
-
//return true if the result is 0
-
#define put_page_testzero(p) atomic_dec_and_test(&(p)->count)
在mm/page_alloc.c中L66
-
static void __free_pages_ok (struct page *page, unsigned int order)
-
{
-
unsigned long index, page_idx, mask, flags;
-
free_area_t *area;
-
struct page *base;
-
zone_t *zone;
-
-
/* Yes, think what happens when other parts of the kernel take
-
* a reference to a page in order to pin it for io. -ben
-
*/
-
if (PageLRU(page))
-
lru_cache_del(page);
-
-
if (page->buffers)
-
BUG();
-
if (page->mapping)
-
BUG();
-
if (!VALID_PAGE(page))
-
BUG();
-
if (PageSwapCache(page))
-
BUG();
-
if (PageLocked(page))
-
BUG();
-
if (PageLRU(page))
-
BUG();
-
if (PageActive(page))
-
BUG();
-
page->flags &= ~((1<<PG_referenced) | (1<<PG_dirty)); //清除dirty标志
-
-
if (current->flags & PF_FREE_PAGES)
-
goto local_freelist;
-
back_local_freelist:
-
-
zone = page->zone; //在mem_map初始化free_area_init_core>时,就把每个page所指向的zone指定好了
-
-
mask = (~0UL) << order;
-
base = zone->zone_mem_map; //base就是zone在mem_map的地址,例zone_dma就是mem_map的首地址0xc1000010
-
page_idx = page - base; //查看page在zone的第几项
-
if (page_idx & ~mask)
-
BUG();
-
index = page_idx >> (1 + order);
-
-
area = zone->free_area + order;
-
-
spin_lock_irqsave(&zone->lock, flags);
-
-
zone->free_pages -= mask; //mask是unsigned long,以补码形式存在的-1,所以每次调用__free_page_ok,则zone->free_pages会加1
-
-
while (mask + (1 << (MAX_ORDER-1))) { //这个过程用文字不好解释,直接看下面的图了
-
struct page *buddy1, *buddy2;
-
-
if (area >= zone->free_area + MAX_ORDER)
-
BUG();
-
if (!__test_and_change_bit(index, area->map))
-
break;
-
/*
-
* Move the buddy up one level.
-
*/
-
buddy1 = base + (page_idx ^ -mask);
-
buddy2 = base + page_idx;
-
if (BAD_RANGE(zone,buddy1))
-
BUG();
-
if (BAD_RANGE(zone,buddy2))
-
BUG();
-
-
memlist_del(&buddy1->list);
-
mask <<= 1;
-
area++;
-
index >>= 1;
-
page_idx &= mask;
-
}
-
memlist_add_head(&(base + page_idx)->list, &area->free_list); //放到free_list中去
-
-
spin_unlock_irqrestore(&zone->lock, flags);
-
return;
-
-
local_freelist:
-
if (current->nr_local_pages)
-
goto back_local_freelist;
-
if (in_interrupt())
-
goto back_local_freelist;
-
-
list_add(&page->list, ¤t->local_pages);
-
page->index = order;
-
current->nr_local_pages++;
-
}
下面是生成free_list的过程:
最终的链表:
zone中只保存list的head,例order_9的head是保存在zone->free_area[9].free_list中的,对于head的prev与next是保存在page的list中的,
所以说zone的free_area[n].free_list 再加上 相应page中的list构成了一条完整的list
附录1. __free_pages_ok
关于unsigned long取反
-
#include <stdio.h>
-
#include <stdlib.h>
-
int main ( int argc, char *argv[] )
-
{
-
unsigned long mask;
-
mask = (~0UL);
-
printf("mask=%ld\n", mask); //mask=-1
-
mask = (~0UL)<<1;
-
printf("mask=%ld\n", mask); //mask=-2
-
mask = (~0UL)<<2;
-
printf("mask=%ld\n", mask); //mask=-4
-
return EXIT_SUCCESS;
-
}
附录2:
-
static __inline__ int __test_and_change_bit(int nr, volatile void * addr)
-
{
-
int oldbit;
-
__asm__ __volatile__(
-
"btcl %2,%1\n\tsbbl %0,%0"
-
:"=r" (oldbit),"=m" (ADDR)
-
:"Ir" (nr) : "memory");
-
return oldbit;
-
}
BTC %2, %1 -->%1是addr 2是nr
先把%1对应的内存地址的第%2位的值填入cflags寄存器的进位位C,然后把该位反转
原先addr的nr位是1,则返回1,并将nr位反转
原先addr的nr位是0,则返回0,并将nr位反转
阅读(1997) | 评论(0) | 转发(0) |