Chinaunix首页 | 论坛 | 博客
  • 博客访问: 55283
  • 博文数量: 3
  • 博客积分: 605
  • 博客等级: 一等列兵
  • 技术积分: 110
  • 用 户 组: 普通用户
  • 注册时间: 2011-05-03 20:14
个人简介

打算换工作了,求职北京地区内核职位..

文章分类
文章存档

2014年(2)

2013年(1)

分类: BSD

2014-06-06 14:16:26

BSD amd64 内存管理分析 (freebsd9.0) by chishanmingshen
http://chishanmingshen.blog.chinaunix.net
第一部分 基本流程
elf64_exec(struct preloaded_file *fp)
第一次设置页表:2M*512=1G空间的映射.
__exec((void *)VTOP(amd64_tramp), modulep, kernend);

amd64_tramp:
设置cr3.打开分页机制.此时是32bit模式
跳到64bit模式.(之前的entry_hi/entry_lo即btext地址)
ljmp $0x8, $VTOP(longmode)


locore.S
call hammer_time(其中会调用getmemsize(kmdp, physfree)->pmap_bootstrap()->create_pagetable().)
call mi_startup(module init)


0.pmap_bootstrap(vm_paddr_t *firstaddr)
create_pagetables(firstaddr)
virtual_avail = (vm_offset_t) KERNBASE + *firstaddr;
#define KERNBASE KVADDR(KPML4I, KPDPI, 0, 0)/(511,510,0,0)最后1G空间
virtual_end = VM_MAX_KERNEL_ADDRESS;
#define VM_MAX_KERNEL_ADDRESS KVADDR(KPML4I, NPDPEPG-1, NKPDE-1, NPTEPG-1)/*511,510,511*/最后留了2M
/*kernel_pmap记录PML4表基址的虚拟地址,从物理地址KPML4.*/
kernel_pmap->pm_pml4 = (pdp_entry_t *) (KERNBASE + KPML4phys);


1. SYS_INIT's vm_mem_init()
vm_set_page_size
virtual_avail = vm_page_startup(virtual_avail);/*初始化各个物理页面,然后加入到freelist中*/
遍历phys_avail[],得到段数:nblocks,总的空间大小:total.
vm_pageq_init()/*page queue*/
扣去umaslb得到new_end,将umaslb调pmap_map和uma_startup.(支持Dmap,所以不递增vaddr.
#define PHYS_TO_DMAP(x) ((x) | DMAP_MIN_ADDRESS)
#define DMAP_MIN_ADDRESS KVADDR(DMPML4I, 0, 0, 0)/*510*/
计算可用物理页面总数为page_range个.
npages = (total - (page_range * sizeof(struct vm_page)) - (end - new_end)) / PAGE_SIZE;
vm_page_array指向pmap_map()映射后的vm_page[]空间,共npages个页面
phys_avail[biggestone + 1] = new_end;最后一段内存更正为到new_end结束,扣除了vm_page[].
vm_page_array_size = page_range;
vm_phys_init();/*初始化物理内存分配器*/
对所有段调用vm_phys_create_seg(phys_avail[i], phys_avail[i + 1],VM_FREELIST_DEFAULT);
更新到vm_phys_segs[]中.
vm_phys_free_queues[vm_nfreelists][VM_NFREEPOOL]
/*static int vm_nfreelists = VM_FREELIST_DEFAULT + 1;*/
遍历phys_avail[],对所有物理页调用vm_phys_add_page(pa).
vm_phys_add_page(pa/*vm_paddr 物理地址*/):初始化一个物理页面,同时将它加到free list中.
m = vm_phys_paddr_to_vm_page(vm_paddr_t pa):/*找到给定物理地址对应的vm_page*/
遍历vm_phys_segs[],找到对应的vm_page结构指针,并返回该指针.
return &(seg->first_page[atop(pa - seg->start)]);
pmap_page_init(m);
vm_phys_free_pages(m, 0);/*加到freelist中*/
return (vaddr);
/*最后将可以用的虚拟地址返回,其中vm_page[]的空间已经加进去了.
返回的virtual_avail,由外面使用,即普通物理页面空间*/


3.vm_object_init();
3.1
kernel_object_store (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)
3.2
kmem_object_store (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)
3.3
1个vm_object的zoune: obj_zone 


4.vm_map_startup();
创建1个vm_map的zone:mapzone callby vm_map_create()
创建2个vm_map_entry的zone :kmapentzone和mapentzone 根据vm_map生成vm_map_entry时,由vm_mpa的system_map决定.


5.kmem_init(virtual_avail, virtual_end);
vm_map_t m;
根据给定的物理地址范围,如kernel_pmap, 在虚拟空间min和max内, 生成一个vm_map m.
/*vm kernel_pmap:-2G->-4M*/
m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS/*-2G*/, end/*VM_MAX_KERNEL_ADDRESS*/);
vm_map_t result = uma_zalloc(mapzone, M_WAITOK);/*从mapzone中分一个vm_map*/
_vm_map_init(result, min, max);给vm_map赋值各个字段(例如,result->pmap = pmap)
kernel_map = m;/*内核总空间*/
(void) vm_map_insert(m, NULL, (vm_ooffset_t) 0, VM_MIN_KERNEL_ADDRESS, start, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
#define VM_MAX_KERNEL_ADDRESS KVADDR(KPML4I, NPDPEPG-1, NKPDE-1, NPTEPG-1) -2M
#define VM_MIN_KERNEL_ADDRESS KVADDR(KPML4I, KPDPI, 0, 0) -2G




6.pmap_init();
需要虚拟内存的模块可以调用了
建立一个管理255个页面(放置1M个页表项)的对象
kptobj = vm_object_allocate(OBJT_DEFAULT, NKPDE);
对每个vm_page初始化pv_list//m = &vm_page_array[i];
每个vm_page都有一个struct pv_entry.这些pv_entry由pvinit带头.
pvinit = (struct pv_entry *) kmem_alloc(kernel_map,initial_pvs * sizeof (struct pv_entry));

7.vm_pager_init()
初始化已知的页
pagertab[]

第二部分 初始化
SYSINIT's KMEM module first kmeminit()
vm_kmem_size = 2*物理内存
kmem_map = kmem_suballoc(kernel_map, &kmembase, &kmemlimit,vm_kmem_size);申请kmem_map空间
   kmem_suballoc(parent/*kernel_map*/, min/*output*/, max/*output*/, size/*要分得的sub映射空间*/):
    *min = (vm_offset_t) vm_map_min(parent);
    *max = *min + size;
    result = vm_map_create(vm_map_pmap(parent), *min, *max);
    return result;/*将新分配的vm_map返回.即是kmem_map空间.*/
kmem_map->system_map = 1;
mt_zone = uma_zcreate("mt_zone", sizeof(struct malloc_type_internal),
静态数组kmemzones[]处理:根据kmemzones[indx].kz_zone申请uma_zone结构体(2^4,...,2^12).
uma_zcreate->uma_zalloc_internel返回zone
uma_zone_slab->slab_alloc->uma_small_alloc->kmem_malloc                  

SYSINIT's KMEM module second malloc_init(void *data)
struct malloc_type_internal *mtip = uma_zalloc(mt_zone, M_WAITOK | M_ZERO);

第三部分 申请内存
void *
malloc(unsigned long size, struct malloc_type *mtp, int flags)
1.小内存
indx = kmemsize[size >> KMEM_ZSHIFT];/*根据申请内存大小得到index
zone = kmemzones[indx].kz_zone; /*由index得到对应的zone
va = uma_zalloc(zone, flags);
2.大内存
uma_large_malloc(size, flags)
slab = uma_zalloc_internal(slabzone, NULL, wait);
mem = page_alloc(NULL, size, &flags, wait);
slab->us_data = mem;

static uma_slab_t
slab_zalloc(uma_zone_t zone, int wait)
mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE,
   &flags, wait);
   m = vm_page_alloc()得到vm_page.
    vm_phys_alloc_pages(pool, order);
    f1 = vm_phys_free_queues[flind][pool];
    m = TAILQ_FIRST(&fl[oind].pl);
TAILQ_REMOVE(&fl[oind].pl, m, pageq);    
    vm_phys_split_pages(m, oind, fl, order);
    buddy算法,找比order稍大的vm_page
    1.如果是对象非空, vm_page_insert(m, object, pindex);将vm_map加入到vm
    2.如果空对象, 则m->pindex = pindex;
    return m;
pa = m->phys_addr;
dump_add_page(pa);
va = (void *)PHYS_TO_DMAP(pa);/*内核申请的故放到DMAP*/    
/*#ifdef UMA_MD_SMALL_ALLOC
keg->uk_allocf = uma_small_alloc;/************?
#else
keg->uk_allocf = page_alloc;
#endif*/    

void *
uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
ret = ((void *)kmem_malloc(kmem_map, bytes, M_NOWAIT));
从指定的vm_map,即kmem_map中申请内存,大小为bytes.
return ret;


第四部分 页表相关
static void
create_pagetables(vm_paddr_t *firstaddr)
{
int i;

/* Allocate pages */
KPTphys = allocpages(firstaddr, NKPT);
KPML4phys = allocpages(firstaddr, 1);
KPDPphys = allocpages(firstaddr, NKPML4E);
KPDphys = allocpages(firstaddr, NKPDPE);


ndmpdp = (ptoa(Maxmem) + NBPDP - 1) >> PDPSHIFT;
if (ndmpdp < 4) /* Minimum 4GB of dirmap */
ndmpdp = 4;
DMPDPphys = allocpages(firstaddr, NDMPML4E);
DMPDphys = allocpages(firstaddr, ndmpdp);
dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT;

/* Fill in the underlying page table pages */
/* Read-only from zero to physfree */
/* XXX not fully used, underneath 2M pages */
for (i = 0; (i << PAGE_SHIFT) < *firstaddr; i++) {/*1->PT表*/
((pt_entry_t *)KPTphys)[i] = i << PAGE_SHIFT;
((pt_entry_t *)KPTphys)[i] |= PG_RW | PG_V | PG_G | PG_U;
}


/* Now map the page tables at their location within PTmap */
for (i = 0; i < NKPT; i++) {/*2->PD表*/
((pd_entry_t *)KPDphys)[i] = KPTphys + (i << PAGE_SHIFT);
((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V | PG_U;
}

/* Map from zero to end of allocations under 2M pages */
/* This replaces some of the KPTphys entries above */
for (i = 0; (i << PDRSHIFT) < *firstaddr; i++) {/*2->PD表 2M 直接跳过1*/
((pd_entry_t *)KPDphys)[i] = i << PDRSHIFT;
((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V | PG_PS | PG_G | PG_U;
}


/* And connect up the PD to the PDP */
for (i = 0; i < NKPDPE; i++) {/*3->PDP表*/
((pdp_entry_t *)KPDPphys)[i + KPDPI] = KPDphys + (i << PAGE_SHIFT);
((pdp_entry_t *)KPDPphys)[i + KPDPI] |= PG_RW | PG_V | PG_U | PG_U;
}

/*2->DMPD表 2M 直接跳过1*/
/* Now set up the direct map space using 2MB pages */
for (i = 0; i < NPDEPG * ndmpdp; i++) {/*
((pd_entry_t *)DMPDphys)[i] = (vm_paddr_t)i << PDRSHIFT;
((pd_entry_t *)DMPDphys)[i] |= PG_RW | PG_V | PG_PS | PG_G | PG_U;
}
/*3->DMPDP表*/
/* And the direct map space's PDP */
for (i = 0; i < ndmpdp; i++) {
((pdp_entry_t *)DMPDPphys)[i] = DMPDphys + (i << PAGE_SHIFT);
((pdp_entry_t *)DMPDPphys)[i] |= PG_RW | PG_V | PG_U;
}

/* And recursively map PML4 to itself in order to get PTmap */
/*4->PML4表*/
((pdp_entry_t *)KPML4phys)[PML4PML4I] = KPML4phys;/*256 递归*/
((pdp_entry_t *)KPML4phys)[PML4PML4I] |= PG_RW | PG_V | PG_U;


/* Connect the Direct Map slot up to the PML4 */
((pdp_entry_t *)KPML4phys)[DMPML4I] = DMPDPphys;/*510 */
((pdp_entry_t *)KPML4phys)[DMPML4I] |= PG_RW | PG_V | PG_U;


/* Connect the KVA slot up to the PML4 */
((pdp_entry_t *)KPML4phys)[KPML4I] = KPDPphys;/*511 */
((pdp_entry_t *)KPML4phys)[KPML4I] |= PG_RW | PG_V | PG_U;
}

uma调用和page_alloc:都实际从kmem_map子空间中分配空间.而不是kernel_map.
static void *
page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
{
void *p; /* Returned page */
*pflag = UMA_SLAB_KMEM;
p = (void *) kmem_malloc(kmem_map, bytes, wait);

return (p);
}

vm_offset_t
kmem_malloc(map/*从哪个空间,比如kmem_map*/, size/*申请内存的大小*/, flags)
/*被uma_small_alloc()和page_alloc()调用.作用就是从kmem_map子空间中申请空间,大小为size.*/
addr = vm_map_findspace()来找出addr这个起始虚拟地址.
offset = addr - VM_MIN_KERNEL_ADDRESS;
vm_object_reference(kmem_object);
/*插入新的vm_map_entry_t,代表空间size大小*/
vm_map_insert(map, kmem_object, offset, addr, addr + size,VM_PROT_ALL, VM_PROT_ALL, 0);
/*逐页调用vm_page_alloc()来为每页生成vm_page结构体*/
vm_page_t m = vm_page_alloc(kmem_object, OFF_TO_IDX(offset + i), pflags);
/**************NOW, call pmap_enter!!!***************/
pmap_enter(kernel_pmap, addr + i, m, VM_PROT_ALL, 1);
return addr;

#define OFF_TO_IDX(off) ((vm_pindex_t)(((vm_ooffset_t)(off)) >> PAGE_SHIFT))
_vm_object_allocate分配2个对象:kmem_object,kernel_object.空间是一样大的:
(VM_MAX_KERNEL_ADDRESS - VM_MAX_KERNEL_ADDRESS)


struct pmap {
struct mtx pm_mtx;
pml4_entry_t *pm_pml4; /* KVA of level 4 page table */
TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */
u_int pm_active; /* active on cpus */
/* spare u_int here due to padding */
struct pmap_statistics pm_stats; /* pmap statistics */
};

void
pmap_growkernel(vm_offset_t addr)
/*
 * Address of current and alternate address space page table maps
 * and directories.
 * XXX it might be saner to just direct map all of physical memory
 * into the kernel using 2MB pages.  We have enough space to do
 * it (2^47 bits of KVM, while current max physical addressability
 * is 2^40 physical bits).  Then we can get rid of the evil hole
 * in the page tables and the evil overlapping.
 */
 内核可以有空间2^48/2,即内核和userland各一半.
 目前内核仅仅用了2个表项,即1024G,
#ifdef _KERNEL
#define addr_PTmap (KVADDR(PML4PML4I, 0, 0, 0))
#define addr_PDmap (KVADDR(PML4PML4I, PML4PML4I, 0, 0))
#define addr_PDPmap (KVADDR(PML4PML4I, PML4PML4I, PML4PML4I, 0))
#define addr_PML4map (KVADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I))
#define addr_PML4pml4e (addr_PML4map + (PML4PML4I * sizeof(pml4_entry_t)))
#define PTmap ((pt_entry_t *)(addr_PTmap))
#define PDmap ((pd_entry_t *)(addr_PDmap))
#define PDPmap ((pd_entry_t *)(addr_PDPmap))
#define PML4map ((pd_entry_t *)(addr_PML4map))
#define PML4pml4e ((pd_entry_t *)(addr_PML4pml4e))

extern u_int64_t KPML4phys; /* physical address of kernel level 4 */
#endif

在内核访问va的方法是通过PTmap
PMAP_INLINE pt_entry_t *
vtopte(vm_offset_t va)
{
u_int64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1);
return (PTmap + ((va >> PAGE_SHIFT) & mask));
}

第五部分
待续(by chishanmingshen)。。。


阅读(4949) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~