一.总体说明
二.代码分析
以下以vmalloc(0x6400000); //100M 进行说明
2.1 在include/linux/vmalloc.h中
-
//从ZONE_HIGH与ZONE_NORMAL中分配内存
-
static inline void * vmalloc (unsigned long size)
-
{
-
return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
-
}
-
-
//从ZONE_DMA中分配内存
-
static inline void * vmalloc_dma (unsigned long size)
-
{
-
return __vmalloc(size, GFP_KERNEL|GFP_DMA, PAGE_KERNEL);
-
}
-
-
//从ZONE_NORMAL中分配内存
-
static inline void * vmalloc_32(unsigned long size)
-
{
-
return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
-
}
2.2 在mm/vmalloc.中 L229
-
void * __vmalloc (unsigned long size, int gfp_mask, pgprot_t prot)
-
{
-
void * addr;
-
struct vm_struct *area;
-
//页对齐,不足一页按一页来计算,例执行前size=10,执行后size=4096
-
size = PAGE_ALIGN(size);
-
//并判断size是否超过了物理内存的大小,若超出,则退出
-
if (!size || (size >> PAGE_SHIFT) > num_physpages) {
-
BUG();
-
return NULL;
-
}
-
area = get_vm_area(size, VM_ALLOC); //2.3
-
if (!area)
-
return NULL;
-
addr = area->addr; //这个addr是从0xF8800000=3976M开始的,所以vmalloc最大能分配120M
-
if (vmalloc_area_pages(VMALLOC_VMADDR(addr), size, gfp_mask, prot)) { //2.4
-
vfree(addr);
-
return NULL;
-
}
-
return addr;
-
}
2.3 初始化area
-
struct vm_struct * get_vm_area(unsigned long size, unsigned long flags)
-
{
-
unsigned long addr;
-
struct vm_struct **p, *tmp, *area;
-
//用slab申请area的内存
-
area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL); //执行后area=0xc210c180,物理地址=33.047M处
-
if (!area)
-
return NULL;
-
size += PAGE_SIZE;
-
addr = VMALLOC_START; //0xf8800000=3976M,离4G只有120M
-
write_lock(&vmlist_lock);
-
for (p = &vmlist; (tmp = *p) ; p = &tmp->next) {
-
if ((size + addr) < addr)
-
goto out;
-
if (size + addr <= (unsigned long) tmp->addr)
-
break;
-
addr = tmp->size + (unsigned long) tmp->addr;
-
if (addr > VMALLOC_END-size)
-
goto out;
-
}
-
//初始化area
-
area->flags = flags;
-
area->addr = (void *)addr;
-
area->size = size;
-
area->next = *p;
-
*p = area;
-
write_unlock(&vmlist_lock);
-
return area;
-
-
out:
-
write_unlock(&vmlist_lock);
-
kfree(area);
-
return NULL;
-
}
2.4 在mm/vmlloc.c中
-
//address=0xf8800000, size=0x1000,gfp_mask=0x1f2, prot=0x163
-
inline int vmalloc_area_pages (unsigned long address, unsigned long size, int gfp_mask, pgprot_t prot)
-
{
-
pgd_t * dir;
-
unsigned long end = address + size; //end最大值是0xFFFFFFFF,unsigned long的取值范围超过4G就再从0开始
-
int ret; //如果end超过了4G,则address
-
//pgd=0xc0101000, index=(addr>>22)*4=(addr>>20)=0xF88,所以执行后dir=0xc0101f88
-
dir = pgd_offset_k(address);
-
spin_lock(&init_mm.page_table_lock);
-
do {
-
pmd_t *pmd;
-
//在pgtable-2level.h中函数pmd_alloc没有作用,执行后pmd=dir
-
pmd = pmd_alloc(&init_mm, dir, address); //2.4.1
-
ret = -ENOMEM;
-
if (!pmd)
-
break;
-
-
ret = -ENOMEM;
-
if (alloc_area_pmd(pmd, address, end - address, gfp_mask, prot)) //2.5 映射4M的内存
-
break;
-
-
address = (address + PGDIR_SIZE) & PGDIR_MASK;
-
dir++;
-
-
ret = 0;
-
} while (address && (address < end)); //循环操作,直到end,最大能映射120M
-
spin_unlock(&init_mm.page_table_lock);
-
flush_cache_all();
-
return ret;
-
}
2.4.1 在include/linux/mm.h中L433
-
static inline pmd_t *pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
-
{
-
if (pgd_none(*pgd)) //在pgtabl-2level.h中这个是0
-
return __pmd_alloc(mm, pgd, address);
-
return pmd_offset(pgd, address); //这个函数在pgtable-2level.h中直接将pgd返回了
-
}
2.5 映射4M内存
-
static inline int alloc_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size, int gfp_mask, pgprot_t prot)
-
{
-
unsigned long end;
-
-
address &= ~PGDIR_MASK;
-
end = address + size;
-
if (end > PGDIR_SIZE) //PGDIR_SIZE=4M,1个页目录表项可以映射4M内存
-
end = PGDIR_SIZE;
-
do {
-
//申请一页内存作为页表,并用页表基地址更新页目录表项pmd
-
pte_t * pte = pte_alloc(&init_mm, pmd, address); //2.5.1 0xc211b000
-
if (!pte)
-
return -ENOMEM;
-
//映射一个页表(1024项)-->申请1024次内存,并用每次申请到内存的基地址去更新页表项(2.5.1申请的)
-
if (alloc_area_pte(pte, address, end - address, gfp_mask, prot)) //2.5.2
-
return -ENOMEM;
-
address = (address + PMD_SIZE) & PMD_MASK;
-
pmd++;
-
} while (address < end); //注意: 这个循环只执行一次
-
return 0;
-
}
-
2.5.1 申请一页内存作为页表,并用页表基地址更新页目录表项pmd
-
pte_t *pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
-
{
-
if (pmd_none(*pmd)) { //!pmd_val(x),因为pmd->pmd=0,所以这儿非一下,if就成立了
-
pte_t *new;
-
-
//申请一页内存并清0
-
new = pte_alloc_one_fast(mm, address); //用到了pte_quicklist,执行后new=NULL
-
if (!new) {
-
spin_unlock(&mm->page_table_lock);
-
new = pte_alloc_one(mm, address); //2.6真正是在这儿申请了一页内存,并清0的
-
spin_lock(&mm->page_table_lock);
-
if (!new)
-
return NULL;
-
-
if (!pmd_none(*pmd)) {
-
pte_free(new);
-
goto out;
-
}
-
}
-
//更新页目录表项-->用刚申请的内存去更新page_dir_entry
-
//更新后page_dir_entry=0x67=(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
-
pmd_populate(mm, pmd, new); //这儿的pmd就是pde=0xc0101f88, new=0x0211b000
-
}
-
out:
-
return pte_offset(pmd, address); //将申请到的页表地址返回 0x0211b000
-
}
#define pmd_populate(mm, pmd, pte) set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))
#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval)
pmd_populate执行前后PDE的改变
-
(gdb) x /4wx 0xc0101f80
-
0xc0101f80 <swapper_pg_dir+3968>: 0x00000000 0x00000000 0x00000000 0x00000000
-
-
(gdb) x /4wx 0xc0101f80
-
0xc0101f80 <swapper_pg_dir+3968>: 0x00000000 0x00000000 0x0211b067 0x00000000
2.6 在include/asm/pagalloc.h中 L107
-
static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address)
-
{
-
pte_t *pte;
-
-
pte = (pte_t *) __get_free_page(GFP_KERNEL); //获取一页内存
-
if (pte)
-
clear_page(pte); //memset((void *)(page), 0, PAGE_SIZE)将内存清0
-
return pte;
-
}
2.5.2 映射一个页表(1024项) --> 申请1024次内存,并用每次申请到内存的基地址去更新页表项(2.5.1申请的)
-
static inline int alloc_area_pte (pte_t * pte, unsigned long address, unsigned long size, int gfp_mask, pgprot_t prot)
-
{
-
unsigned long end;
-
-
address &= ~PMD_MASK; //address=0x0
-
end = address + size;
-
if (end > PMD_SIZE)
-
end = PMD_SIZE;
-
//申请一页内存,并将这一页内存映射到页表项中,一直循环直到1024个页表项全部映射完成
-
do {
-
struct page * page;
-
spin_unlock(&init_mm.page_table_lock);
-
page = alloc_page(gfp_mask); //申请一页内存
-
spin_lock(&init_mm.page_table_lock);
-
if (!page)
-
return -ENOMEM;
-
set_pte(pte, mk_pte(page, prot)); //将刚申请的这一页内存映射到页表项中
-
address += PAGE_SIZE;
-
pte++;
-
} while (address < end); //循环共执行1024次
-
return 0;
-
}
第一次执行前后页表项的变化
-
(gdb) p /x pte
-
$23 = 0xc211b000
-
(gdb) x /4wx pte
-
0xc211b000: 0x00000000 0x00000000 0x00000000 0x00000000
-
-
(gdb) x /4wx pte
-
0xc211b000: 0x3fffd163 0x00000000 0x00000000 0x00000000
整个while循环完成后页表项的变化
-
x /1028wx 0xc211b000
-
0xc211b000: 0x3fffd163 0x3fffc163 0x3fffb163 0x3fffa163 -->从0x3FFFd000从物理内存的最高端开始映射
-
0xc211b010: 0x3fff9163 0x3fff8163 0x3fff7163 0x3fff6163
-
...
-
0xc211bff0: 0x3fc01163 0x3fc00163 0x3fbff163 0x3fbfe163 -->一共1024个页表项
-
0xc211c000: 0xc210e278 0xc210e278 0x00000100 0xc211c100
附: vmalloc(100M)时页目录表的变化
a. vmalloc之前
-
(gdb) x /128wx 0xc0101f00
-
0xc0101f00 <swapper_pg_dir+3840>: 0x300001e3 0x304001e3 0x308001e3 0x30c001e3
-
0xc0101f10 <swapper_pg_dir+3856>: 0x310001e3 0x314001e3 0x318001e3 0x31c001e3
-
0xc0101f20 <swapper_pg_dir+3872>: 0x320001e3 0x324001e3 0x328001e3 0x32c001e3
-
0xc0101f30 <swapper_pg_dir+3888>: 0x330001e3 0x334001e3 0x338001e3 0x33c001e3
-
0xc0101f40 <swapper_pg_dir+3904>: 0x340001e3 0x344001e3 0x348001e3 0x34c001e3
-
0xc0101f50 <swapper_pg_dir+3920>: 0x350001e3 0x354001e3 0x358001e3 0x35c001e3
-
0xc0101f60 <swapper_pg_dir+3936>: 0x360001e3 0x364001e3 0x368001e3 0x36c001e3
-
-
0xc0101f70 <swapper_pg_dir+3952>: 0x370001e3 0x374001e3 0x378001e3 0x37c001e3
-
0xc0101f80 <swapper_pg_dir+3968>: 0x00000000 0x00000000 0x00000000 0x00000000 -->绿色部分是高端内存的映射区
-
0xc0101f90 <swapper_pg_dir+3984>: 0x00000000 0x00000000 0x00000000 0x00000000
-
0xc0101fa0 <swapper_pg_dir+4000>: 0x00000000 0x00000000 0x00000000 0x00000000
-
0xc0101fb0 <swapper_pg_dir+4016>: 0x00000000 0x00000000 0x00000000 0x00000000
-
0xc0101fc0 <swapper_pg_dir+4032>: 0x00000000 0x00000000 0x00000000 0x00000000
-
0xc0101fd0 <swapper_pg_dir+4048>: 0x00000000 0x00000000 0x00000000 0x00000000
-
0xc0101fe0 <swapper_pg_dir+4064>: 0x00004063 0x00000000 0x00000000 0x00000000
-
0xc0101ff0 <swapper_pg_dir+4080>: 0x00000000 0x00000000 0x00000000 0x00003063
-
440xc0102000 <pg0>: 0x00000027 0x00001007 0x00002067 0x00003007
b. vmalloc之后
-
x /40wx 0xc0101f80
-
0xc0101f80 <swapper_pg_dir+3968>: 0x00000000 0x00000000 0x0211b067 0x0211a067 -->从0xF88开始
-
0xc0101f90 <swapper_pg_dir+3984>: 0x02119067 0x02118067 0x02117067 0x02116067
-
0xc0101fa0 <swapper_pg_dir+4000>: 0x02115067 0x02114067 0x02113067 0x02112067
-
0xc0101fb0 <swapper_pg_dir+4016>: 0x02111067 0x02110067 0x37dff067 0x37dfe067
-
0xc0101fc0 <swapper_pg_dir+4032>: 0x37dfd067 0x37dfc067 0x37dfb067 0x37dfa067
-
0xc0101fd0 <swapper_pg_dir+4048>: 0x37df9067 0x37df8067 0x37df7067 0x37df6067
-
0xc0101fe0 <swapper_pg_dir+4064>: 0x00004063 0x37df5067 0x37df4067 0x00000000 -->这儿竟然没有改变0x0004063
-
0xc0101ff0 <swapper_pg_dir+4080>: 0x00000000 0x00000000 0x00000000 0x00003063
-
0xc0102000 <pg0>: 0x00000027 0x00001007 0x00002067 0x00003007
附录2. __vmalloc中的标志在什么地方起作用?
vmalloc-->__vmalloc -->在__vmalloc中设置了标志GFP_HIGHMEM GFP_KERNEL GFP_DMA等标志
a. 起作用是在alloc_page中,调用顺序是
vmalloc_area_pages-->alloc_area_pmd-->alloc_area_pte-->alloc_page
b. 下方gfp_mask就是标志
-
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
b.1
-
在include/linux/mm.h中L361
-
static inline struct page * alloc_pages(unsigned int gfp_mask, unsigned int order)
-
{
-
if (order >= MAX_ORDER)
-
return NULL;
-
return _alloc_pages(gfp_mask, order);
-
}
b.2 重点来了
-
在mm/page_alloc.c中L227
-
#ifndef CONFIG_DISCONTIGMEM
-
struct page *_alloc_pages(unsigned int gfp_mask, unsigned int order)
-
{
-
return __alloc_pages(gfp_mask, order, contig_page_data.node_zonelists+(gfp_mask & GFP_ZONEMASK));
-
}
-
#endif
contig_page_data.node_zonelists+(gfp_mask & GFP_ZONEMASK) 就是这儿
contig_page_data.node_zonelists是一个数组,里面装着三个zone--> zone_DMA zone_normal zone_high
如果设置了zone_highmem,则到contig_page_data.node_zonelists[2]中去找page
在前面初始化中可以知道,contig_page_data.node_zonelists[2]中一定是分配了高端内存
阅读(1933) | 评论(0) | 转发(0) |