Chinaunix首页 | 论坛 | 博客
  • 博客访问: 2159585
  • 博文数量: 438
  • 博客积分: 3871
  • 博客等级: 中校
  • 技术积分: 6075
  • 用 户 组: 普通用户
  • 注册时间: 2011-09-10 00:11
个人简介

邮箱: wangcong02345@163.com

文章分类

全部博文(438)

文章存档

2017年(15)

2016年(119)

2015年(91)

2014年(62)

2013年(56)

2012年(79)

2011年(16)

分类: LINUX

2016-12-01 10:09:35

一.总体说明

二.代码分析
以下以vmalloc(0x6400000); //100M 进行说明

2.1 在include/linux/vmalloc.h中
  1. //从ZONE_HIGH与ZONE_NORMAL中分配内存
  2. static inline void * vmalloc (unsigned long size)
  3. {
  4.     return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL); 
  5. }

  6. //从ZONE_DMA中分配内存
  7. static inline void * vmalloc_dma (unsigned long size)
  8. {
  9.     return __vmalloc(size, GFP_KERNEL|GFP_DMA, PAGE_KERNEL);
  10. }

  11. //从ZONE_NORMAL中分配内存 
  12. static inline void * vmalloc_32(unsigned long size)
  13. {
  14.     return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
  15. }

2.2 在mm/vmalloc.中 L229
  1. void * __vmalloc (unsigned long size, int gfp_mask, pgprot_t prot)
  2. {
  3.     void * addr;
  4.     struct vm_struct *area;
  5. //页对齐,不足一页按一页来计算,例执行前size=10,执行后size=4096
  6.     size = PAGE_ALIGN(size);
  7. //并判断size是否超过了物理内存的大小,若超出,则退出       
  8.     if (!size || (size >> PAGE_SHIFT) > num_physpages) {
  9.         BUG();
  10.         return NULL;
  11.     }
  12.     area = get_vm_area(size, VM_ALLOC);    //2.3
  13.     if (!area)
  14.         return NULL;
  15.     addr = area->addr;     //这个addr是从0xF8800000=3976M开始的,所以vmalloc最大能分配120M
  16.     if (vmalloc_area_pages(VMALLOC_VMADDR(addr), size, gfp_mask, prot)) //2.4
  17.         vfree(addr);
  18.         return NULL;
  19.     }
  20.     return addr;
  21. }
2.3 初始化area
  1. struct vm_struct * get_vm_area(unsigned long size, unsigned long flags)
  2. {
  3.     unsigned long addr;
  4.     struct vm_struct **p, *tmp, *area;
  5.     //用slab申请area的内存
  6.     area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL);  //执行后area=0xc210c180,物理地址=33.047M处
  7.     if (!area)
  8.         return NULL;
  9.     size += PAGE_SIZE;
  10.     addr = VMALLOC_START;         //0xf8800000=3976M,离4G只有120M
  11.     write_lock(&vmlist_lock);
  12.     for (= &vmlist; (tmp = *p) ; p = &tmp->next) {
  13.         if ((size + addr) < addr)
  14.             goto out;
  15.         if (size + addr <= (unsigned long) tmp->addr)
  16.             break;
  17.         addr = tmp->size + (unsigned long) tmp->addr;
  18.         if (addr > VMALLOC_END-size)
  19.             goto out;
  20.     }
  21.     //初始化area
  22.     area->flags = flags;
  23.     area->addr = (void *)addr;
  24.     area->size = size;
  25.     area->next = *p;
  26.     *= area;
  27.     write_unlock(&vmlist_lock);
  28.     return area;

  29. out:
  30.     write_unlock(&vmlist_lock);
  31.     kfree(area);
  32.     return NULL;
  33. }
2.4 在mm/vmlloc.c中
  1. //address=0xf8800000, size=0x1000,gfp_mask=0x1f2, prot=0x163
  2. inline int vmalloc_area_pages (unsigned long address, unsigned long size, int gfp_mask, pgprot_t prot)
  3. {
  4.     pgd_t * dir;
  5.     unsigned long end = address + size;   //end最大值是0xFFFFFFFF,unsigned long的取值范围超过4G就再从0开始
  6.     int ret;                              //如果end超过了4G,则address
  7. //pgd=0xc0101000, index=(addr>>22)*4=(addr>>20)=0xF88,所以执行后dir=0xc0101f88
  8.     dir = pgd_offset_k(address);   
  9.     spin_lock(&init_mm.page_table_lock);
  10.     do {
  11.         pmd_t *pmd;
  12.         //在pgtable-2level.h中函数pmd_alloc没有作用,执行后pmd=dir   
  13.         pmd = pmd_alloc(&init_mm, dir, address);   //2.4.1 
  14.         ret = -ENOMEM;
  15.         if (!pmd)
  16.             break;

  17.         ret = -ENOMEM;
  18.         if (alloc_area_pmd(pmd, address, end - address, gfp_mask, prot)//2.5 映射4M的内存
  19.             break;

  20.         address = (address + PGDIR_SIZE) & PGDIR_MASK;
  21.         dir++;

  22.         ret = 0;
  23.     } while (address && (address < end));     //循环操作,直到end,最大能映射120M
  24.     spin_unlock(&init_mm.page_table_lock);
  25.     flush_cache_all();
  26.     return ret;
  27. }

2.4.1 在include/linux/mm.h中L433
  1. static inline pmd_t *pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
  2. {
  3.     if (pgd_none(*pgd))   //在pgtabl-2level.h中这个是0
  4.         return __pmd_alloc(mm, pgd, address);
  5.     return pmd_offset(pgd, address);     //这个函数在pgtable-2level.h中直接将pgd返回了
  6. }
2.5 映射4M内存
  1. static inline int alloc_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size, int gfp_mask, pgprot_t prot)
  2. {
  3.     unsigned long end;

  4.     address &= ~PGDIR_MASK;
  5.     end = address + size;
  6.     if (end > PGDIR_SIZE)    //PGDIR_SIZE=4M,1个页目录表项可以映射4M内存
  7.         end = PGDIR_SIZE;
  8.     do {
  9.     //申请一页内存作为页表,并用页表基地址更新页目录表项pmd
  10.         pte_t * pte = pte_alloc(&init_mm, pmd, address);    //2.5.1 0xc211b000
  11.         if (!pte)
  12.             return -ENOMEM;
  13.     //映射一个页表(1024项)-->申请1024次内存,并用每次申请到内存的基地址去更新页表项(2.5.1申请的)
  14.         if (alloc_area_pte(pte, address, end - address, gfp_mask, prot))  //2.5.2
  15.             return -ENOMEM
  16.         address = (address + PMD_SIZE) & PMD_MASK;
  17.         pmd++;
  18.     } while (address < end);    //注意: 这个循环只执行一次
  19.     return 0;
  20. }
2.5.1 申请一页内存作为页表,并用页表基地址更新页目录表项pmd
  1. pte_t *pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
  2. {
  3.     if (pmd_none(*pmd)) {    //!pmd_val(x),因为pmd->pmd=0,所以这儿非一下,if就成立了
  4.         pte_t *new;

  5. //申请一页内存并清0
  6.        new = pte_alloc_one_fast(mm, address);    //用到了pte_quicklist,执行后new=NULL
  7.         if (!new) {
  8.             spin_unlock(&mm->page_table_lock);
  9.             new = pte_alloc_one(mm, address);    //2.6真正是在这儿申请了一页内存,并清0的
  10.             spin_lock(&mm->page_table_lock);
  11.             if (!new)
  12.                 return NULL;

  13.             if (!pmd_none(*pmd)) {
  14.                 pte_free(new);
  15.                 goto out;
  16.             }
  17.         }
  18. //更新页目录表项-->用刚申请的内存去更新page_dir_entry
  19. //更新后page_dir_entry=0x67=(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
  20.         pmd_populate(mm, pmd, new);   //这儿的pmd就是pde=0xc0101f88, new=0x0211b000
  21.     }
  22. out:
  23.     return pte_offset(pmd, address) //将申请到的页表地址返回 0x0211b000
  24. }
#define pmd_populate(mm, pmd, pte)  set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))
#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval)
pmd_populate执行前后PDE的改变
  1. (gdb) x /4wx 0xc0101f80
  2. 0xc0101f80 <swapper_pg_dir+3968>:    0x00000000    0x00000000    0x00000000    0x00000000

  3. (gdb) x /4wx 0xc0101f80
  4. 0xc0101f80 <swapper_pg_dir+3968>:    0x00000000    0x00000000    0x0211b067    0x00000000
2.6 在include/asm/pagalloc.h中 L107
  1. static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address)
  2. {
  3.     pte_t *pte;

  4.     pte = (pte_t *) __get_free_page(GFP_KERNEL);   //获取一页内存
  5.     if (pte)
  6.         clear_page(pte);     //memset((void *)(page), 0, PAGE_SIZE)将内存清0
  7.     return pte;
  8. }
2.5.2 映射一个页表(1024项) --> 申请1024次内存,并用每次申请到内存的基地址去更新页表项(2.5.1申请的)
  1. static inline int alloc_area_pte (pte_t * pte, unsigned long addressunsigned long size, int gfp_mask, pgprot_t prot)
  2. {
  3.     unsigned long end;

  4.     address &= ~PMD_MASK;   //address=0x0
  5.     end = address + size;
  6.     if (end > PMD_SIZE)
  7.         end = PMD_SIZE;
  8. //申请一页内存,并将这一页内存映射到页表项中,一直循环直到1024个页表项全部映射完成
  9.     do {
  10.         struct page * page;
  11.         spin_unlock(&init_mm.page_table_lock);
  12.         page = alloc_page(gfp_mask);           //申请一页内存
  13.         spin_lock(&init_mm.page_table_lock);
  14.         if (!page)
  15.             return -ENOMEM;
  16.         set_pte(pte, mk_pte(page, prot));     //将刚申请的这一页内存映射到页表项中
  17.         address += PAGE_SIZE;
  18.         pte++;
  19.     } while (address < end);   //循环共执行1024次
  20.     return 0;
  21. }
第一次执行前后页表项的变化

  1. (
    gdb) p /x pte
  2. $23 = 0xc211b000
  3. (gdb) x /4wx pte
  4. 0xc211b000:    0x00000000    0x00000000    0x00000000    0x00000000

  5. (gdb) x /4wx pte
  6. 0xc211b000:    0x3fffd163    0x00000000    0x00000000    0x00000000
整个while循环完成后页表项的变化
  1. /1028wx 0xc211b000
  2. 0xc211b000:    0x3fffd163    0x3fffc163    0x3fffb163    0x3fffa163    -->从0x3FFFd000从物理内存的最高端开始映射
  3. 0xc211b010:    0x3fff9163    0x3fff8163    0x3fff7163    0x3fff6163
  4. ...
  5. 0xc211bff0:    0x3fc01163    0x3fc00163    0x3fbff163    0x3fbfe163     -->一共1024个页表项
  6. 0xc211c000:    0xc210e278    0xc210e278    0x00000100    0xc211c100


附: vmalloc(100M)时页目录表的变化
a. vmalloc之前
  1. (gdb) x /128wx 0xc0101f00
  2. 0xc0101f00 <swapper_pg_dir+3840>:    0x300001e3    0x304001e3    0x308001e3    0x30c001e3
  3. 0xc0101f10 <swapper_pg_dir+3856>:    0x310001e3    0x314001e3    0x318001e3    0x31c001e3
  4. 0xc0101f20 <swapper_pg_dir+3872>:    0x320001e3    0x324001e3    0x328001e3    0x32c001e3
  5. 0xc0101f30 <swapper_pg_dir+3888>:    0x330001e3    0x334001e3    0x338001e3    0x33c001e3
  6. 0xc0101f40 <swapper_pg_dir+3904>:    0x340001e3    0x344001e3    0x348001e3    0x34c001e3
  7. 0xc0101f50 <swapper_pg_dir+3920>:    0x350001e3    0x354001e3    0x358001e3    0x35c001e3
  8. 0xc0101f60 <swapper_pg_dir+3936>:    0x360001e3    0x364001e3    0x368001e3    0x36c001e3

  9. 0xc0101f70 <swapper_pg_dir+3952>:    0x370001e3    0x374001e3    0x378001e3    0x37c001e3
  10. 0xc0101f80 <swapper_pg_dir+3968>:    0x00000000    0x00000000    0x00000000    0x00000000   -->绿色部分是高端内存的映射区
  11. 0xc0101f90 <swapper_pg_dir+3984>:    0x00000000    0x00000000    0x00000000    0x00000000
  12. 0xc0101fa0 <swapper_pg_dir+4000>:    0x00000000    0x00000000    0x00000000    0x00000000
  13. 0xc0101fb0 <swapper_pg_dir+4016>:    0x00000000    0x00000000    0x00000000    0x00000000
  14. 0xc0101fc0 <swapper_pg_dir+4032>:    0x00000000    0x00000000    0x00000000    0x00000000
  15. 0xc0101fd0 <swapper_pg_dir+4048>:    0x00000000    0x00000000    0x00000000    0x00000000
  16. 0xc0101fe0 <swapper_pg_dir+4064>:    0x00004063    0x00000000    0x00000000    0x00000000
  17. 0xc0101ff0 <swapper_pg_dir+4080>:    0x00000000    0x00000000    0x00000000    0x00003063
  18. 440xc0102000 <pg0>:                  0x00000027    0x00001007    0x00002067    0x00003007
b. vmalloc之后
  1. x /40wx 0xc0101f80
  2. 0xc0101f80 <swapper_pg_dir+3968>:    0x00000000    0x00000000    0x0211b067    0x0211a067      -->从0xF88开始
  3. 0xc0101f90 <swapper_pg_dir+3984>:    0x02119067    0x02118067    0x02117067    0x02116067
  4. 0xc0101fa0 <swapper_pg_dir+4000>:    0x02115067    0x02114067    0x02113067    0x02112067
  5. 0xc0101fb0 <swapper_pg_dir+4016>:    0x02111067    0x02110067    0x37dff067    0x37dfe067
  6. 0xc0101fc0 <swapper_pg_dir+4032>:    0x37dfd067    0x37dfc067    0x37dfb067    0x37dfa067
  7. 0xc0101fd0 <swapper_pg_dir+4048>:    0x37df9067    0x37df8067    0x37df7067    0x37df6067
  8. 0xc0101fe0 <swapper_pg_dir+4064>:    0x00004063    0x37df5067    0x37df4067    0x00000000       -->这儿竟然没有改变0x0004063
  9. 0xc0101ff0 <swapper_pg_dir+4080>:    0x00000000    0x00000000    0x00000000    0x00003063
  10. 0xc0102000 <pg0>:                    0x00000027    0x00001007    0x00002067    0x00003007
附录2. __vmalloc中的标志在什么地方起作用?
vmalloc-->__vmalloc -->在__vmalloc中设置了标志GFP_HIGHMEM GFP_KERNEL GFP_DMA等标志
a. 起作用是在alloc_page中,调用顺序是
vmalloc_area_pages-->alloc_area_pmd-->alloc_area_pte-->alloc_page
b. 下方gfp_mask就是标志
  1. #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
b.1 
  1. 在include/linux/mm.h中L361
  2. static inline struct page * alloc_pages(unsigned int gfp_mask, unsigned int order)
  3. {
  4.     if (order >= MAX_ORDER)
  5.         return NULL;
  6.     return _alloc_pages(gfp_mask, order);
  7. }
b.2 重点来了
  1. 在mm/page_alloc.c中L227
  2. #ifndef CONFIG_DISCONTIGMEM
  3. struct page *_alloc_pages(unsigned int gfp_mask, unsigned int order)
  4. {
  5.     return __alloc_pages(gfp_mask, ordercontig_page_data.node_zonelists+(gfp_mask & GFP_ZONEMASK));
  6. }
  7. #endif
contig_page_data.node_zonelists+(gfp_mask & GFP_ZONEMASK) 就是这儿
contig_page_data.node_zonelists是一个数组,里面装着三个zone--> zone_DMA zone_normal zone_high
如果设置了zone_highmem,则到contig_page_data.node_zonelists[2]中去找page
在前面初始化中可以知道,contig_page_data.node_zonelists[2]中一定是分配了高端内存


阅读(1933) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~