setup_arch之paging_init函数
对于arm处理器paging_init函数在arch/arm/mm/nommu.c与arch/arm/mm/mmu.c中分别有定义,在nommu是是不支持mmu存储管理的,它定义很简单如下:
void __init paging_init(struct machine_desc *mdesc)
{
bootmem_init();
}
关于nommu中的paging_init函数不是我们分析的重点,这里主要以mmu.c中定义为主,它详细代码如下:
void __init paging_init(struct machine_desc *mdesc)
{
void *zero_page;
sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), meminfo_cmp, NULL);
build_mem_type_table();
sanity_check_meminfo();
prepare_page_table();
map_lowmem();
devicemaps_init(mdesc);
kmap_init();
top_pmd = pmd_off_k(0xffff0000);
/* allocate the zero page. */
zero_page = early_alloc(PAGE_SIZE);
bootmem_init();
empty_zero_page = virt_to_page(zero_page);
__flush_dcache_page(NULL, empty_zero_page);
}
/*
* Adjust the PMD section entries according to the CPU in use.
*/
这个函数主要是用来建立各种类型的页表选项(比如内存是MEMORY类型,设备室DEVICE,中断向量表是HIGH_VECTORS,arch/arm/mm/mmu.c
struct cachepolicy {
const char policy[16];
unsigned int cr_mask;
unsigned int pmd;
unsigned int pte;
};
static struct cachepolicy cache_policies[] __initdata = {
{
.policy = "uncached",
.cr_mask = CR_W|CR_C,
.pmd = PMD_SECT_UNCACHED,
.pte = L_PTE_MT_UNCACHED,
}, {
.policy = "buffered",
.cr_mask = CR_C,
.pmd = PMD_SECT_BUFFERED,
.pte = L_PTE_MT_BUFFERABLE,
}, {
.policy = "writethrough",
.cr_mask = 0,
.pmd = PMD_SECT_WT,
.pte = L_PTE_MT_WRITETHROUGH,
}, {
.policy = "writeback",
.cr_mask = 0,
.pmd = PMD_SECT_WB,
.pte = L_PTE_MT_WRITEBACK,
}, {
.policy = "writealloc",
.cr_mask = 0,
.pmd = PMD_SECT_WBWA,
.pte = L_PTE_MT_WRITEALLOC,
}
};
static void __init build_mem_type_table(void)
{
struct cachepolicy *cp;
unsigned int cr = get_cr(); //获取cp15处理器的c1寄存器位
unsigned int user_pgprot, kern_pgprot, vecs_pgprot;
int cpu_arch = cpu_architecture(); //获取处理器架构版本
/*
arch/arm/include/asm/system.h
static inline unsigned int get_cr(void)
{
unsigned int val;
asm("mrc p15, 0, %0, c1, c0, 0 @ get CR" : "=r" (val) : : "cc");
return val;
}
*/
int i;
//根据处理器版本号调整cache政策,不是写缓冲区的政策
if (cpu_arch < CPU_ARCH_ARMv6) {
#if defined(CONFIG_CPU_DCACHE_DISABLE)
if (cachepolicy > CPOLICY_BUFFERED)
cachepolicy = CPOLICY_BUFFERED;
#elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
if (cachepolicy > CPOLICY_WRITETHROUGH)
cachepolicy = CPOLICY_WRITETHROUGH;
#endif
}
if (cpu_arch < CPU_ARCH_ARMv5) {
if (cachepolicy >= CPOLICY_WRITEALLOC)
cachepolicy = CPOLICY_WRITEBACK;
ecc_mask = 0; //因为v5前的处理器的一级描述符没有定义第9位作为保护标志位
//我们的cachepolicy是3,因此相应的配置如下
// .policy = "writeback",
// .cr_mask = 0,
// .pmd = PMD_SECT_WB,
// .pte = PTE_BUFFERABLE|PTE_CACHEABLE,
}
/*
arch/arm/mm/mmu.c
#define CPOLICY_WRITEBACK 3
#define CPOLICY_WRITEALLOC 4
arm体系结构共定义了 6 个版本,版本号分别为 1 ~ 6 ,ARM9采用ARMV4T(Harvard)结构,五级流水处理以及分离的Cache结构,平均功耗为0.7mW/MHz。时钟速度为120MHz-200MHz,每条指令平均执行1.5个时钟周期*/
#ifdef CONFIG_SMP
cachepolicy = CPOLICY_WRITEALLOC;
#endif
/*
* Strip out features not present on earlier architectures.
* Pre-ARMv5 CPUs don't have TEX bits. Pre-ARMv6 CPUs or those
* without extended page tables don't have the 'Shared' bit.
* arch/arm/include/asm/system.h
* #define CPU_ARCH_UNKNOWN 0
* #define CPU_ARCH_ARMv3 1
* #define CPU_ARCH_ARMv4 2
* #define CPU_ARCH_ARMv5 4
*/
if (cpu_arch < CPU_ARCH_ARMv5)
for (i = 0; i < ARRAY_SIZE(mem_types); i++)
mem_types[i].prot_sect &= ~PMD_SECT_TEX(7);
/*mem_types是一个全局数组arch/arm/mm/mmu.c,里面有所有类型,见后
arch/arm/include/asm/pgtable-hwdef.h
#define PMD_SECT_TEX(x) ((x) << 12) /* v5 */
#define PMD_BIT4 (1 << 4)
*/
if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3())
for (i = 0; i < ARRAY_SIZE(mem_types); i++)
mem_types[i].prot_sect &= ~PMD_SECT_S;
/*
* ARMv5 and lower, bit 4 must be set for page tables (was: cache
* "update-able on write" bit on ARM610). However, Xscale and
* Xscale3 require this bit to be cleared.
*/
if (cpu_is_xscale() || cpu_is_xsc3()) {
for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
mem_types[i].prot_sect &= ~PMD_BIT4; //#define PMD_BIT4 (1 << 4)
mem_types[i].prot_l1 &= ~PMD_BIT4; //将一级描述符的第4位置1
}
} else if (cpu_arch < CPU_ARCH_ARMv6) {
for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
if (mem_types[i].prot_l1)
mem_types[i].prot_l1 |= PMD_BIT4;
if (mem_types[i].prot_sect)
mem_types[i].prot_sect |= PMD_BIT4;
}
}
/*
* Mark the device areas according to the CPU/architecture.
*/
if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) {
if (!cpu_is_xsc3()) {
/*
* Mark device regions on ARMv6+ as execute-never
* to prevent speculative instruction fetches.
*/
mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN;
mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN;
mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN;
mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN;
}
if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
/*
* For ARMv7 with TEX remapping,
* - shared device is SXCB=1100
* - nonshared device is SXCB=0100
* - write combine device mem is SXCB=0001
* (Uncached Normal memory)
*/
mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1);
mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1);
mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
} else if (cpu_is_xsc3()) {
/*
* For Xscale3,
* - shared device is TEXCB=00101
* - nonshared device is TEXCB=01000
* - write combine device mem is TEXCB=00100
* (Inner/Outer Uncacheable in xsc3 parlance)
*/
mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED;
mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
} else {
/*
* For ARMv6 and ARMv7 without TEX remapping,
* - shared device is TEXCB=00001
* - nonshared device is TEXCB=01000
* - write combine device mem is TEXCB=00100
* (Uncached Normal in ARMv6 parlance).
*/
mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED;
mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
}
} else {
/*
* On others, write combining is "Uncached/Buffered"
*/
mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
}
//PMD_SECT_BUFFERABLE (1 << 2) ; MT_DEVICE_WC 3
/*
* Now deal with the memory-type mappings
*/
cp = &cache_policies[cachepolicy];
vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;
//kern_pgprot user_pgprot是内核和用户空间的二级页表描述符
#ifndef CONFIG_SMP
/*
* Only use write-through for non-SMP systems
*/
if (cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH)
vecs_pgprot = cache_policies[CPOLICY_WRITETHROUGH].pte;
#endif
/*
* Enable CPU-specific coherency if supported.
* (Only available on XSC3 at the moment.)
*/
if (arch_is_coherent() && cpu_is_xsc3()) {
mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED;
mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED;
}
/*
* ARMv6 and above have extended page tables.
*/
if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) {
/*
* Mark cache clean areas and XIP ROM read only
* from SVC mode and no access from userspace.
*/
mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
#ifdef CONFIG_SMP
/*
* Mark memory with the "shared" attribute for SMP systems
*/
user_pgprot |= L_PTE_SHARED;
kern_pgprot |= L_PTE_SHARED;
vecs_pgprot |= L_PTE_SHARED;
mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S;
mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED;
mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S;
mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED;
mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED;
mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED;
#endif
}
/*
* Non-cacheable Normal - intended for memory areas that must
* not cause dirty cache line writebacks when used
*/
if (cpu_arch >= CPU_ARCH_ARMv6) {
if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
/* Non-cacheable Normal is XCB = 001 */
mem_types[MT_MEMORY_NONCACHED].prot_sect |=
PMD_SECT_BUFFERED;
} else {
/* For both ARMv6 and non-TEX-remapping ARMv7 */
mem_types[MT_MEMORY_NONCACHED].prot_sect |=
PMD_SECT_TEX(1);
}
} else {
mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
}
for (i = 0; i < 16; i++) {
unsigned long v = pgprot_val(protection_map[i]);//这里依次获取16个默认的保护类型的值
protection_map[i] = __pgprot(v | user_pgprot);//这里将值再加上我们的设置就是最新的16个值,将它写回更新
/*mm/mmap.c
pgprot_t protection_map[16] = {
__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
__S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
};
arch/arm/include/asm/pgtable.h
#define __P000 __PAGE_NONE
#define __P001 __PAGE_READONLY
__PAGE_NONE等都在pgtable.h 定义
*/
}
/*
typedef unsigned long pgprot_t;
#define pgprot_val(x) (x)
#define __pgprot(x) (x)
*/
mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot;
mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot;
pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
L_PTE_DIRTY | L_PTE_WRITE | kern_pgprot);
mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd;
mem_types[MT_MEMORY].prot_pte |= kern_pgprot;
mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask;
mem_types[MT_ROM].prot_sect |= cp->pmd;
switch (cp->pmd) {
case PMD_SECT_WT:
mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT;
break;
case PMD_SECT_WB:
case PMD_SECT_WBWA:
mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB;
break;
}
//以上所有的操作都是为了给mem_types这个结构体中的各种类型中的页表参数添加上我们的要求,主要是一级页表,二级页表,ap(访问权限控制);至于domain是利用系统初始化时的值,不用我们再进行干预。系统的domain类型一共有四种,kernel——0;user——1;io—— 2
printk("Memory policy: ECC %sabled, Data cache %s\n",
ecc_mask ? "en" : "dis", cp->policy);
for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
struct mem_type *t = &mem_types[i];
if (t->prot_l1)
t->prot_l1 |= PMD_DOMAIN(t->domain);
if (t->prot_sect)
t->prot_sect |= PMD_DOMAIN(t->domain);
}
}
arch/arm/mm/mm.h
struct mem_type {
unsigned int prot_pte;
unsigned int prot_l1; //prot_l1 prot_sect都是一级描述符的意思
unsigned int prot_sect;
unsigned int domain;
};
mem_types全局数组定义,里面有所有类型。
arch/arm/mm/mmu.c
#define PROT_PTE_DEVICE L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_WRITE
#define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_AP_WRITE
static struct mem_type mem_types[] = {
[MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */
.prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
L_PTE_SHARED,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PROT_SECT_DEVICE | PMD_SECT_S,
.domain = DOMAIN_IO,
},
[MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */
.prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PROT_SECT_DEVICE,
.domain = DOMAIN_IO,
},
[MT_DEVICE_CACHED] = { /* ioremap_cached */
.prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB,
.domain = DOMAIN_IO,
},
[MT_DEVICE_WC] = { /* ioremap_wc */
.prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PROT_SECT_DEVICE,
.domain = DOMAIN_IO,
},
[MT_UNCACHED] = {
.prot_pte = PROT_PTE_DEVICE,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
.domain = DOMAIN_IO,
},
[MT_CACHECLEAN] = {
.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
.domain = DOMAIN_KERNEL,
},
[MT_MINICLEAN] = {
.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE,
.domain = DOMAIN_KERNEL,
},
[MT_LOW_VECTORS] = {
.prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
L_PTE_EXEC,
.prot_l1 = PMD_TYPE_TABLE,
.domain = DOMAIN_USER,
},
[MT_HIGH_VECTORS] = {
.prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
L_PTE_USER | L_PTE_EXEC,
.prot_l1 = PMD_TYPE_TABLE,
.domain = DOMAIN_USER,
},
[MT_MEMORY] = {
.prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
L_PTE_WRITE | L_PTE_EXEC,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
.domain = DOMAIN_KERNEL,
},
[MT_ROM] = {
.prot_sect = PMD_TYPE_SECT,
.domain = DOMAIN_KERNEL,
},
[MT_MEMORY_NONCACHED] = {
.prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
L_PTE_WRITE | L_PTE_EXEC | L_PTE_MT_BUFFERABLE,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
.domain = DOMAIN_KERNEL,
},
[MT_MEMORY_DTCM] = {
.prot_pte = L_PTE_PRESENT | L_PTE_YOUNG |
L_PTE_DIRTY | L_PTE_WRITE,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
.domain = DOMAIN_KERNEL,
},
[MT_MEMORY_ITCM] = {
.prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
L_PTE_USER | L_PTE_EXEC,
.prot_l1 = PMD_TYPE_TABLE,
.domain = DOMAIN_IO,
},
};
const struct mem_type *get_mem_type(unsigned int type)
{
return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL;
}
EXPORT_SYMBOL(get_mem_type);
arch/arm/mm/mmu.c
720行 static void * __initdata vmalloc_min = (void *)(VMALLOC_END - SZ_128M);其中VMALLOC_END在arch/arm/mach-s3c2410/include/mach/vmalloc.h中宏定义为0xE0000000UL
phys_addr_t lowmem_end_addr; //phys_addr_t对于32位机就u32
static void __init sanity_check_meminfo(void)
{
int i, j, highmem = 0;
lowmem_end_addr = __pa(vmalloc_min - 1) + 1;//虚地址->物理地址转换
for (i = 0, j = 0; i < meminfo.nr_banks; i++) {
struct membank *bank = &meminfo.bank[j];
*bank = meminfo.bank[i];
/*
arch/arm/include/asm/memory.h第34行
#define PAGE_OFFSET UL(CONFIG_PAGE_OFFSET)
include/generated/autoconf.h第34行255
#define CONFIG_PAGE_OFFSET 0xC0000000
*/
#ifdef CONFIG_HIGHMEM //__va物理地址->虚地址转换
if (__va(bank->start) > vmalloc_min ||
__va(bank->start) < (void *)PAGE_OFFSET)
highmem = 1;
bank->highmem = highmem;
/*
* Split those memory banks which are partially overlapping
* the vmalloc area greatly simplifying things later.
*/
if (__va(bank->start) < vmalloc_min &&
bank->size > vmalloc_min - __va(bank->start)) {
if (meminfo.nr_banks >= NR_BANKS) { //对2410来说NR_BANKS定义为8
printk(KERN_CRIT "NR_BANKS too low, "
"ignoring high memory\n");
} else {
memmove(bank + 1, bank,
(meminfo.nr_banks - i) * sizeof(*bank));
//将bank的sizeof(*bank)字节内存内容的拷贝bank+1
meminfo.nr_banks++;
i++;
bank[1].size -= vmalloc_min - __va(bank->start);
bank[1].start = __pa(vmalloc_min - 1) + 1;
bank[1].highmem = highmem = 1;
j++;
}
bank->size = vmalloc_min - __va(bank->start);
}
#else
bank->highmem = highmem;
/*
* Check whether this memory bank would entirely overlap
* the vmalloc area.
*/
if (__va(bank->start) >= vmalloc_min ||
__va(bank->start) < (void *)PAGE_OFFSET) {
printk(KERN_NOTICE "Ignoring RAM at %.8lx-%.8lx "
"(vmalloc region overlap).\n",
bank->start, bank->start + bank->size - 1);
continue;
}
/*
* Check whether this memory bank would partially overlap
* the vmalloc area.
*/
if (__va(bank->start + bank->size) > vmalloc_min ||
__va(bank->start + bank->size) < __va(bank->start)) {
unsigned long newsize = vmalloc_min - __va(bank->start);
printk(KERN_NOTICE "Truncating RAM at %.8lx-%.8lx "
"to -%.8lx (vmalloc region overlap).\n",
bank->start, bank->start + bank->size - 1,
bank->start + newsize - 1);
bank->size = newsize;
}
#endif
j++;
}
#ifdef CONFIG_HIGHMEM
if (highmem) {
const char *reason = NULL;
if (cache_is_vipt_aliasing()) {
/*
* Interactions between kmap and other mappings
* make highmem support with aliasing VIPT caches
* rather difficult.
* 在kmap和其它mappings间建立带aliasing VIPT caches高端内存相当困难
* #define cache_is_vipt_aliasing() cacheid_is(CACHEID_VIPT_ALIASING)
* #define CACHEID_VIPT_ALIASING (1 << 2)
*/
reason = "with VIPT aliasing cache";
#ifdef CONFIG_SMP //不作处理
} else if (tlb_ops_need_broadcast()) {
/*
* kmap_high needs to occasionally flush TLB entries,
* however, if the TLB entries need to be broadcast
* we may deadlock:
* kmap_high(irqs off)->flush_all_zero_pkmaps->
* flush_tlb_kernel_range->smp_call_function_many
* (must not be called with irqs off)
*/
reason = "without hardware TLB ops broadcasting";
#endif
}
if (reason) {
printk(KERN_CRIT "HIGHMEM is not supported %s, ignoring high memory\n",
reason);
while (j > 0 && meminfo.bank[j - 1].highmem)
j--;
}
}
#endif
meminfo.nr_banks = j;
}
linux刚启动时,系统创建了一个临时页表,那个是临时的,既然正式的要上场了,临时的当然要退休了。prepare_page_table
static inline void prepare_page_table(void)
{
unsigned long addr;
/*
* Clear out all the mappings below the kernel image.
*/
for (addr = 0; addr < MODULES_VADDR; addr += PGDIR_SIZE)
pmd_clear(pmd_off_k(addr));
#ifdef CONFIG_XIP_KERNEL
/* The XIP kernel is mapped in the module area -- skip over it */
addr = ((unsigned long)_etext + PGDIR_SIZE - 1) & PGDIR_MASK;
#endif
for ( ; addr < PAGE_OFFSET; addr += PGDIR_SIZE)
pmd_clear(pmd_off_k(addr));
/*
* Clear out all the kernel space mappings, except for the first
* memory bank, up to the end of the vmalloc region.
*/
for (addr = __phys_to_virt(bank_phys_end(&meminfo.bank[0]));
addr < VMALLOC_END; addr += PGDIR_SIZE)
pmd_clear(pmd_off_k(addr));
}
MODULES_VADDR =0xC0000000-16M,就是内核以下的映射。
VMALLOC_END=0xE0000000清除直到vmalloc区的所有的内核映射,除了第一个内存块。
其实这些都是次要的,主要看pmd_clear(pmd_off_k(addr))这个东东。
pmd_off_k查找一个虚拟地址的内核页表目录项
static inline pmd_t *pmd_off_k(unsigned long virt)
{
return pmd_off(pgd_offset_k(virt), virt);
}
static inline pmd_t *pmd_off(pgd_t *pgd, unsigned long virt)
{
return pmd_offset(pgd, virt);
}
#define pgd_index(addr) ((addr) >> PGDIR_SHIFT)
#define pgd_offset(mm, addr) ((mm)->pgd+pgd_index(addr))
#define pgd_offset_k(addr) pgd_offset(&init_mm, addr)
#define pmd_offset(dir, addr) ((pmd_t *)(dir))
typedef unsigned long pmd_t;
#define PMD_SHIFT 21
#define PGDIR_SHIFT 21
在我们的开发板上init_mm->pgd的地址为0x50004000,这个是静态编译时就被定义了。曾经为了pgd_offset而困惑。为什么呢?我们以addr为0xC0000000为例,pgd_index(addr) =0x600,而init_mm->pgd=0x50004000,但是pgd_offset_k(addr)=0x50007000,这个是怎么得到的呢?0x50004000+0x600 =0x50004600啊,不应该是0x50007000。后来突然醍醐灌顶,顿悟了,为什么,因为这里面是指针相加,而pgd为unsigned long int pgd[2];每一次增加都是以8个字节增加,故pgd_offset_k(addr)为0x50007000,其中一个section的信息占有四个字节。到此先暂停一下,咱们跳到混沌初开,linux刚启动时创建临时页表的代码中。