2012年(8)
分类:
2012-03-17 21:02:06
Written by leeming
这一讲是主要讲setup_arch中那个没有解释的函数解释完毕,完成setup_arch的函数,好让我们的start_kernel继续下去。
/*
* paging_init() sets up the page tables, initialises the zone memory
* maps, and sets up the zero page, bad page and bad page tables.
*这部分的主要工作建立页表,初始化内存。
*/
void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc)
{
void *zero_page;
//这个函数主要是用来建立各种类型的页表选项(比如内存是MEMORY类型,设备室DEVICE,中断向量表是HIGH_VECTORS)
build_mem_type_table();
{
struct cachepolicy *cp;
//获取cp15处理器的c1寄存器位
unsigned int cr = get_cr();
unsigned int user_pgprot, kern_pgprot;
//获取处理器架构版本
int cpu_arch = cpu_architecture();
int i;
//根据处理器版本号调整cache政策,不是写缓冲区的政策
#if defined(CONFIG_CPU_DCACHE_DISABLE)
if (cachepolicy > CPOLICY_BUFFERED)
cachepolicy = CPOLICY_BUFFERED;
#elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
if (cachepolicy > CPOLICY_WRITETHROUGH)
cachepolicy = CPOLICY_WRITETHROUGH;
#endif
if (cpu_arch < CPU_ARCH_ARMv5) {
if (cachepolicy >= CPOLICY_WRITEALLOC)
cachepolicy = CPOLICY_WRITEBACK;
ecc_mask = 0;//因为v5前的处理器的一级描述符没有定义第9位作为保护标志位
}
if (cpu_arch <= CPU_ARCH_ARMv5TEJ) {
//mem_types是一个全局数组arch/arm/mm-armv.c,里面有所有类型
for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
//prot_l1 prot_sect都是一级描述符的意思
//将一级描述符的第4位置1
if (mem_types[i].prot_l1)
mem_types[i].prot_l1 |= PMD_BIT4;
if (mem_types[i].prot_sect)
mem_types[i].prot_sect |= PMD_BIT4;
}
}
//我们的cachepolicy是3,因此相应的配置如下
// .policy = "writeback",
// .cr_mask = 0,
// .pmd = PMD_SECT_WB,
// .pte = PTE_BUFFERABLE|PTE_CACHEABLE,
cp = &cache_policies[cachepolicy];
//kern_pgprot user_pgprot是内核和用户空间的二级页表描述符
kern_pgprot = user_pgprot = cp->pte;
//以下删除了非v4t架构的高版本代码
for (i = 0; i < 16; i++) {
//这里依次获取16个默认的保护类型的值
unsigned long v = pgprot_val(protection_map[i]);
//(L_PTE_BUFFERABLE|L_PTE_CACHEABLE)这是linux pte的定义
//内核中有linux和hardware两种定义方式,为了更好的兼容性
//这里两者间是匹配的,这里将值再加上我们的设置就是
//最新的16个值,将它写回更新
v = (v & ~(L_PTE_BUFFERABLE|L_PTE_CACHEABLE)) | user_pgprot;
protection_map[i] = __pgprot(v);
}
mem_types[MT_LOW_VECTORS].prot_pte |= kern_pgprot;
mem_types[MT_HIGH_VECTORS].prot_pte |= kern_pgprot;
mem_types[MT_MINICLEAN].prot_sect &= ~PMD_SECT_TEX(1);
pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
L_PTE_DIRTY | L_PTE_WRITE |
L_PTE_EXEC | kern_pgprot);
mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd;
mem_types[MT_ROM].prot_sect |= cp->pmd;
switch (cp->pmd) {
case PMD_SECT_WT:
mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT;
break;
case PMD_SECT_WB:
case PMD_SECT_WBWA:
mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB;
break;
}
//以上所有的操作都是为了给mem_types这个结构体中的各种类型中的页表参数添加上我们的要求,主要是一级页表,二级页表,ap(访问权限控制);至于domain是利用系统初始化时的值,不用我们再进行干预。
//系统的domain类型一共有四种,kernel——0;user——1;io——2
printk("Memory policy: ECC %sabled, Data cache %s\n",
ecc_mask ? "en" : "dis", cp->policy);
}
bootmem_init(mi);
{
unsigned long addr, memend_pfn = 0;
int node, initrd_node, i;
/*
* Invalidate the node number for empty or invalid memory banks
*/
for (i = 0; i < mi->nr_banks; i++)
if (mi->bank[i].size == 0 || mi->bank[i].node >= MAX_NUMNODES)
mi->bank[i].node = -1;
//将在4020.c fixup函数中定义的内存信息添加到meminfo结构体中
memcpy(&meminfo, mi, sizeof(meminfo));
//MODULE_START是0xc0000000
for (addr = 0; addr < MODULE_START; addr += PGDIR_SIZE)
//内核在进入保护模式前, 还没有启用分页功能, 在这之前内核要先建立一个临时内核页表,因为在进入保护模式后, 内核继续初始化直到建
//立完整的内存映射机制之前, 仍然需要用到页表来映射相应的内存地址。 临时页表的初始化是在arch/i386/kernel/head.S中进行的:
//swapper_pg_dir是临时页全局目录表, 它是在内核编译过程中静态初始化的.
//pg0是第一个页表开始的地方, 它也是内核编译过程中静态初始化的.
//pmd_off_k是获取虚拟地址为addr的页表项地址
//pmd_clear是将()中的页表项地址中的数据清0
pmd_clear(pmd_off_k(addr));
#ifdef CONFIG_XIP_KERNEL
/* The XIP kernel is mapped in the module area -- skip over it */
addr = ((unsigned long)&_etext + PGDIR_SIZE - 1) & PGDIR_MASK;
#endif
//防止xip之后会有变化,检查,做一次用户空间的清除
for ( ; addr < PAGE_OFFSET; addr += PGDIR_SIZE)
pmd_clear(pmd_off_k(addr));
/*
* Clear out all the kernel space mappings, except for the first
* memory bank, up to the end of the vmalloc region.
*/
//清除内核空间,但是不清楚内存所在区域,也就是
//0xc2000000-0xd0000000的空间
for (addr = __phys_to_virt(mi->bank[0].start + mi->bank[0].size);
addr < VMALLOC_END; addr += PGDIR_SIZE)
pmd_clear(pmd_off_k(addr));
/*
* Locate which node contains the ramdisk image, if any.
*/
//返回如果有initrd所在的内存节点
initrd_node = check_initrd(mi);
/*
* Run through each node initialising the bootmem allocator.
*/
for_each_node(node) {
unsigned long end_pfn;
//为内存建立一级页表(多的话还有二级页表)
end_pfn = bootmem_init_node(node, initrd_node, mi);
/*
* Remember the highest memory PFN.
*/
if (end_pfn > memend_pfn)
memend_pfn = end_pfn;
}
high_memory = __va(memend_pfn << PAGE_SHIFT);
/*
* This doesn't seem to be used by the Linux memory manager any
* more, but is used by ll_rw_block. If we can get rid of it, we
* also get rid of some of the stuff above as well.
*
* Note: max_low_pfn and max_pfn reflect the number of _pages_ in
* the system, not the maximum PFN.
*/
max_pfn = max_low_pfn = memend_pfn - PHYS_PFN_OFFSET;
}
devicemaps_init(mdesc);
{
struct map_desc map;
unsigned long addr;
void *vectors;
/*
* Allocate the vector page early.
*/
//为中断向量表申请一页的空间,申请的位置
//就是之前在内存中建立的页表的物理地址
vectors = alloc_bootmem_low_pages(PAGE_SIZE);
BUG_ON(!vectors);
for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE)
//pmd_off_k是获取虚拟地址为addr的页表项地址
//pmd_clear是将()中的页表项地址中的数据清0
pmd_clear(pmd_off_k(addr));
/*
* Map the kernel if it is XIP.
* It is always first in the modulearea.
*/
#ifdef CONFIG_XIP_KERNEL
map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & PGDIR_MASK);
map.virtual = MODULE_START;
map.length = ((unsigned long)&_etext - map.virtual + ~PGDIR_MASK) & PGDIR_MASK;
map.type = MT_ROM;
create_mapping(&map);
#endif
/*
* Map the cache flushing regions.
*/
#ifdef FLUSH_BASE
map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS);
map.virtual = FLUSH_BASE;
map.length = PGDIR_SIZE;
map.type = MT_CACHECLEAN;
create_mapping(&map);
#endif
#ifdef FLUSH_BASE_MINICACHE
map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + PGDIR_SIZE);
map.virtual = FLUSH_BASE_MINICACHE;
map.length = PGDIR_SIZE;
map.type = MT_MINICLEAN;
create_mapping(&map);
#endif
/*
* Create a mapping for the machine vectors at the high-vectors
* location (0xffff0000). If we aren't using high-vectors, also
* create a mapping at the low-vectors virtual address.
*/
//为中断向量表建立页映射,相应的选项配置在
//build_mem_type_table()中已经设置好了
map.pfn = __phys_to_pfn(virt_to_phys(vectors));
map.virtual = 0xffff0000;
map.length = PAGE_SIZE;
map.type = MT_HIGH_VECTORS;
create_mapping(&map);
//如果没有配置为高端向量
if (!vectors_high()) {
map.virtual = 0;
map.type = MT_LOW_VECTORS;
create_mapping(&map);
}
/*
* Ask the machine support to map in the statically mapped devices.
*/
//其实就是调用了iotable_init(sep4020_io_desc, ARRAY_SIZE(sep4020_io_desc))函数
//而这个iotable_init函数调用了create_mapping这个函数把我们这个数组中的各个成员建立页表;
if (mdesc->map_io)
mdesc->map_io();
/*
* Finally flush the caches and tlb to ensure that we're in a
* consistent state wrt the writebuffer. This also ensures that
* any write-allocated cache lines in the vector page are written
* back. After this point, we can start to touch devices again.
*/
//建立完页表一定要刷tlb,原因见上英文部分
local_flush_tlb_all();
flush_cache_all();
}
//关于pgd,pmd,pte
//PGD每个条目中指向一个PUD,PUD的每个条目指向
//一个PMD,PMD的每个条目指向一个PTE,PTE的每个条目指向一个页面(Page)的物理首地址。
//在arm中没有使用pud,pmd也是直接返回的
//这里就是返回0xffff0000这一页在pgd中的偏移项
top_pmd = pmd_off_k(0xffff0000);
/*
* allocate the zero page. Note that we count on this going ok.
*/
//empty_zero_page是一中特殊的页,供初始化为0的数据和写时复制(cow)使用
zero_page = alloc_bootmem_low_pages(PAGE_SIZE);
memzero(zero_page, PAGE_SIZE);
empty_zero_page = virt_to_page(zero_page);
flush_dcache_page(empty_zero_page);
}