Chinaunix首页 | 论坛 | 博客
  • 博客访问: 206810
  • 博文数量: 33
  • 博客积分: 0
  • 博客等级: 民兵
  • 技术积分: 1277
  • 用 户 组: 普通用户
  • 注册时间: 2013-03-03 10:03
个人简介

现于杭州电子科技大学攻读硕士学位

文章分类

全部博文(33)

文章存档

2013年(33)

我的朋友

分类: LINUX

2013-09-21 21:47:25

/*****************************************************************************************************************************************/
/* head-armv.S */
/*
 *  linux/arch/arm/kernel/head-armv.S
 *
 *  Copyright (C) 1994-1999 Russell King
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 *  32-bit kernel startup code for all architectures
 */
#include
#include


#include
#include
#include


#define K(a,b,c) ((a) << 24 | (b) << 12 | (c))


/*
 * We place the page tables 16K below TEXTADDR.  Therefore, we must make sure
 * that TEXTADDR is correctly set.  Currently, we expect the least significant
 * "short" to be 0x8000, but we could probably relax this restriction to
 * TEXTADDR > PAGE_OFFSET + 0x4000
 *
 * Note that swapper_pg_dir is the virtual address of the page tables, and
 * pgtbl gives us a position-independent reference to these tables.  We can
 * do this because stext == TEXTADDR
 *
 * swapper_pg_dir, pgtbl and krnladr are all closely related.
 */
#if (TEXTADDR & 0xffff) != 0x8000
#error TEXTADDR must start at 0xXXXX8000
#endif


.globl SYMBOL_NAME(swapper_pg_dir)
.equ SYMBOL_NAME(swapper_pg_dir), TEXTADDR - 0x4000  /*页表放在距离内核代码段下面的4k的地方0xc0004000 */


.macro pgtbl, reg, rambase
adr \reg, stext                       /* stext为内核的入口地址,比如0x30008000 */
sub \reg, \reg, #0x4000
.endm


/*
 * Since the page table is closely related to the kernel start address, we
 * can convert the page table base address to the base address of the section
 * containing both.
 */
.macro krnladr, rd, pgtable, rambase
bic \rd, \pgtable, #0x000ff000
.endm


/*
 * Kernel startup entry point.
 * ---------------------------
 *
 * This is normally called from the decompressor code.  The requirements
 * are: MMU = off, D-cache = off, I-cache = dont care, r0 = 0,
 * r1 = machine nr.
 *
 * This code is mostly position independent, so if you link the kernel at
 * 0xc0008000, you call this at __pa(0xc0008000).
 *
 * See linux/arch/arm/tools/mach-types for the complete list of machine
 * numbers for r1.
 *
 * We're trying to keep crap to a minimum; DO NOT add any machine specific
 * crap here - that's what the boot loader (or in extreme, well justified
 * circumstances, zImage) is for.
 */
.section ".text.init",#alloc,#execinstr
.type stext, #function
ENTRY(stext)
mov r12, r0
/*
 * NOTE!  Any code which is placed here should be done for one of
 * the following reasons:
 *
 *  1. Compatability with old production boot firmware (ie, users
 *     actually have and are booting the kernel with the old firmware)
 *     and therefore will be eventually removed.
 *  2. Cover the case when there is no boot firmware.  This is not
 *     ideal, but in this case, it should ONLY set r0 and r1 to the
 *     appropriate value.
 */
#if defined(CONFIG_ARCH_NETWINDER)
/*
 * Compatability cruft for old NetWinder NeTTroms.  This
 * code is currently scheduled for destruction in 2.5.xx
 */
.rept 8
mov r0, r0
.endr


adr r2, 1f
ldmdb r2, {r7, r8}
and r3, r2, #0xc000
teq r3, #0x8000
beq __entry
bic r3, r2, #0xc000
orr r3, r3, #0x8000
mov r0, r3
mov r4, #64
sub r5, r8, r7
b 1f


.word _stext
.word __bss_start


1:
.rept 4
ldmia r2!, {r6, r7, r8, r9}
stmia r3!, {r6, r7, r8, r9}
.endr
subs r4, r4, #64
bcs 1b
movs r4, r5
mov r5, #0
movne pc, r0


mov r1, #MACH_TYPE_NETWINDER @ (will go in 2.5)
mov r12, #2 << 24 @ scheduled for removal in 2.5.xx
orr r12, r12, #5 << 12
__entry:
#endif
#if defined(CONFIG_ARCH_L7200)
/*
 * FIXME - No bootloader, so manually set 'r1' with our architecture number.
 */
mov r1, #MACH_TYPE_L7200
#endif


mov r0, #F_BIT | I_BIT | MODE_SVC @ make sure svc mode
msr cpsr_c, r0 @ and all irqs disabled
bl __lookup_processor_type
teq r10, #0 @ invalid processor?
moveq r0, #'p' @ yes, error 'p'
beq __error
bl __lookup_architecture_type
teq r7, #0 @ invalid architecture?
moveq r0, #'a' @ yes, error 'a'
beq __error
bl __create_page_tables   /* 创建页表 */
adr lr, __ret @ return address
add pc, r10, #12 @ initialise processor
@ (return control reg)


.type __switch_data, %object
__switch_data: .long __mmap_switched
.long SYMBOL_NAME(__bss_start)
.long SYMBOL_NAME(_end)
.long SYMBOL_NAME(processor_id)
.long SYMBOL_NAME(__machine_arch_type)
.long SYMBOL_NAME(cr_alignment)
.long SYMBOL_NAME(init_task_union)+8192


/*
 * Enable the MMU.  This completely changes the structure of the visible
 * memory space.  You will not be able to trace execution through this.
 * If you have an enquiry about this, *please* check the linux-arm-kernel
 * mailing list archives BEFORE sending another post to the list.
 */
.type __ret, %function
__ret: ldr lr, __switch_data
mcr p15, 0, r0, c1, c0           /* 使能MMU */
mrc p15, 0, r0, c1, c0, 0 @ read it back.
mov r0, r0
mov r0, r0
mov pc, lr


/*
 * The following fragment of code is executed with the MMU on, and uses
 * absolute addresses; this is not position independent.
 *
 *  r0  = processor control register
 *  r1  = machine ID
 *  r9  = processor ID
 */
.align 5
__mmap_switched:
adr r3, __switch_data + 4
ldmia r3, {r4, r5, r6, r7, r8, sp}@ r2 = compat
@ sp = stack pointer


mov fp, #0 @ Clear BSS (and zero fp)
1: cmp r4, r5
strcc fp, [r4],#4
bcc 1b


str r9, [r6] @ Save processor ID
str r1, [r7] @ Save machine type
#ifdef CONFIG_ALIGNMENT_TRAP
orr r0, r0, #2 @ ...........A.
#endif
bic r2, r0, #2 @ Clear 'A' bit
stmia r8, {r0, r2} @ Save control register values
b SYMBOL_NAME(start_kernel)






/*
 * Setup the initial page tables.  We only setup the barest
 * amount which are required to get the kernel running, which
 * generally means mapping in the kernel code.
 *
 * We only map in 4MB of RAM, which should be sufficient in
 * all cases.
 *
 * r5 = physical address of start of RAM
 * r6 = physical IO address
 * r7 = byte offset into page tables for IO
 * r8 = page table flags
 */
 /* 映射4MB的物理内存
      r5=内存的起始物理地址比如:0x30000000
      r6=IO物理地址空间
      r7=IO地址空间在页表的的偏移
      r8=页表的访问权限和属性
   */ 
__create_page_tables:  /* 创建页表 */
/*
        .macro pgtbl, reg, rambase
adr \reg, stext                   //stext为内核的入口地址,比如0x30008000
sub \reg, \reg, #0x4000
.endm
  */
pgtbl r4, r5 @ page table address   /* r4=0x30004000=swapper_pg_dir[]数组的首地址 */
/*
              pgtbl r4, r5
              就等价于:
              adr  r4, =0x30008000
              sub r4, r4,#0x4000     //r4=0x30004000=swapper_pg_dir[]数组的首地址
  */
/*
* Clear the 16K level 1 swapper page table
*/
mov r0, r4  /* r0=r4=0x30004000=swapper_pg_dir[] */    
mov r3, #0  /* r3=0 */
add r2, r0, #0x4000  /* r2=0x30008000 */
/* 将30004000~0x30008000这16K的内存清空即清空数组swapper_pg_dir[] */
1: str r3, [r0], #4
str r3, [r0], #4
str r3, [r0], #4
str r3, [r0], #4
teq r0, r2
bne 1b


/*
* Create identity mapping for first MB of kernel to
* cater for the MMU enable.  This identity mapping
* will be removed by paging_init()
*/
/*
              .macro krnladr, rd, pgtable, rambase
bic \rd, \pgtable, #0x000ff000
.endm                
                */
krnladr r2, r4, r5 @ start of kernel
              /* 
              krnladr r2, r4, r5
              就等价于
bic r2, r4, #0x000ff000  //将r4=0x30004000=swapper_pg_dir[]   中的bit[20:12]清0 ====> r2=0x30000000 
                */
              /* 这里将1M内存的起始虚拟地址0x30000000映射到1M内存的物理地址0x30000000上,这样访问虚拟地址和访问物理
                   地址是等价的*/
add r3, r8, r2 @ flags + kernel base    /* r3=0x30000000+flags */
str r3, [r4, r2, lsr #18] @ identity mapping  /*0x30004000+0x30000000>>18=0x30000000+flags;即建立物理地址和虚拟地址相等的映射 */


/*
* Now setup the pagetables for our kernel direct
* mapped region.  We round TEXTADDR down to the
* nearest megabyte boundary.
*/
/* 这里将1M的虚拟起始地址0xc0000000映射到1M的物理起始地址0x30000000上 */
add r0, r4, #(TEXTADDR & 0xff000000) >> 18 @ start of kernel  /* r0=0x30004000+(0xc0008000&0xff000000) >> 18 =0x30004000+0xc0000000>> 18=0x30007000*/
bic r2, r3, #0x00f00000                                                              /* r2=0x30000000+flags */
str r2, [r0] @ PAGE_OFFSET + 0MB                               /* 0x30004000+0xc0000000>> 18=0x30000000+flags --->将0xc0000000映射到0x30000000 */ 


              /* 将将0xc0000000映射到0x30000000,并且将页表项写入上面页表项的后面一项当中 */
add r0, r0, #(TEXTADDR & 0x00f00000) >> 18                             /* r0=0x30004000+0xc0000000>> 18+(0xc0008000&0x00f00000) >> 18=0x30004000+0xc0000000>> 18*/
str r3, [r0], #4 @ KERNEL + 0MB                                 /* r0=0x30007000+4=0x30000000+flags */
/* 将0xc0000000+1M映射到0x30000000+1M */
add r3, r3, #1 << 20                                                                   /*r3=0x30000000+1M */
str r3, [r0], #4 @ KERNEL + 1MB                                 /*r0=0x30007004+4=0x30000000+1M */
/* 将0xc0000000+2M映射到0x30000000+2M */
add r3, r3, #1 << 20                                                                   /*r3=0x30000000+2M */
str r3, [r0], #4 @ KERNEL + 2MB                                 /*r0=0x30007008+4=0x30000000+2M */
/* 将0xc0000000+3M映射到0x30000000+3M */
add r3, r3, #1 << 20                                                                   /*r3=0x30000000+3M */
str r3, [r0], #4 @ KERNEL + 3MB                                 /*r0=0x3000700c+4=0x30000000+3M */


/*
* Ensure that the first section of RAM is present.
* we assume that:
*  1. the RAM is aligned to a 32MB boundary
*  2. the kernel is executing in the same 32MB chunk
*     as the start of RAM.
*/
/* 将0xc0000000映射到0x30000000 */ 
bic r0, r0, #0x01f00000 >> 18 @ round down                            /* r0=0x30007000=0x30004000+0xc0000000>> 18 */
and r2, r5, #0xfe000000 @ round down                            /* r2=0x30000000&0xfe000000=0x30000000 */
add r3, r8, r2 @ flags + rambase                            /* r3=0x30000000+flags */
str r3, [r0]                                                                                /* 0x30004000+0xc0000000>> 18=0x30000000+flags */


bic r8, r8, #0x0c @ turn off cacheable                           
@ and bufferable bits
#ifdef CONFIG_DEBUG_LL    /* 调试相关 */
/*
* Map in IO space for serial debugging.
* This allows debug messages to be output
* via a serial console before paging_init.
*/
add r0, r4, r7
rsb r3, r7, #0x4000 @ PTRS_PER_PGD*sizeof(long)
cmp r3, #0x0800
addge r2, r0, #0x0800
addlt r2, r0, r3
orr r3, r6, r8
1: str r3, [r0], #4
add r3, r3, #1 << 20
teq r0, r2
bne 1b
#if defined(CONFIG_ARCH_NETWINDER) || defined(CONFIG_ARCH_CATS)
/*
* If we're using the NetWinder, we need to map in
* the 16550-type serial port for the debug messages
*/
teq r1, #MACH_TYPE_NETWINDER
teqne r1, #MACH_TYPE_CATS
bne 1f
add r0, r4, #0x3fc0
mov r3, #0x7c000000
orr r3, r3, r8
str r3, [r0], #4
add r3, r3, #1 << 20
str r3, [r0], #4
1:
#endif
#endif
#ifdef CONFIG_ARCH_RPC
/*
* Map in screen at 0x02000000 & SCREEN2_BASE
* Similar reasons here - for debug.  This is
* only for Acorn RiscPC architectures.
*/
add r0, r4, #0x80 @ 02000000
mov r3, #0x02000000
orr r3, r3, r8
str r3, [r0]
add r0, r4, #0x3600 @ d8000000
str r3, [r0]
#endif
mov pc, lr






/*
 * Exception handling.  Something went wrong and we can't
 * proceed.  We ought to tell the user, but since we
 * don't have any guarantee that we're even running on
 * the right architecture, we do virtually nothing.
 * r0 = ascii error character:
 * a = invalid architecture
 * p = invalid processor
 * i = invalid calling convention
 *
 * Generally, only serious errors cause this.
 */
__error:
#ifdef CONFIG_DEBUG_LL
mov r8, r0 @ preserve r0
adr r0, err_str
bl printascii
mov r0, r8
bl printch
#endif
#ifdef CONFIG_ARCH_RPC
/*
 * Turn the screen red on a error - RiscPC only.
 */
mov r0, #0x02000000
mov r3, #0x11
orr r3, r3, r3, lsl #8
orr r3, r3, r3, lsl #16
str r3, [r0], #4
str r3, [r0], #4
str r3, [r0], #4
str r3, [r0], #4
#endif
1: mov r0, r0
b 1b


#ifdef CONFIG_DEBUG_LL
err_str: .asciz "\nError: "
.align
#endif


/*
 * Read processor ID register (CP#15, CR0), and look up in the linker-built
 * supported processor list.  Note that we can't use the absolute addresses
 * for the __proc_info lists since we aren't running with the MMU on
 * (and therefore, we are not in the correct address space).  We have to
 * calculate the offset.
 *
 * Returns:
 * r5, r6, r7 corrupted
 * r8  = page table flags
 * r9  = processor ID
 * r10 = pointer to processor structure
 */
__lookup_processor_type:
adr r5, 2f
ldmia r5, {r7, r9, r10}
sub r5, r5, r10 @ convert addresses
add r7, r7, r5 @ to our address space
add r10, r9, r5
mrc p15, 0, r9, c0, c0 @ get processor id
1: ldmia r10, {r5, r6, r8} @ value, mask, mmuflags
and r6, r6, r9 @ mask wanted bits
teq r5, r6
moveq pc, lr
add r10, r10, #36 @ sizeof(proc_info_list)
cmp r10, r7
blt 1b
mov r10, #0 @ unknown processor
mov pc, lr


/*
 * Look in include/asm-arm/procinfo.h and arch/arm/kernel/arch.[ch] for
 * more information about the __proc_info and __arch_info structures.
 */
2: .long __proc_info_end
.long __proc_info_begin
.long 2b
.long __arch_info_begin
.long __arch_info_end


/*
 * Lookup machine architecture in the linker-build list of architectures.
 * Note that we can't use the absolute addresses for the __arch_info
 * lists since we aren't running with the MMU on (and therefore, we are
 * not in the correct address space).  We have to calculate the offset.
 *
 *  r1 = machine architecture number
 * Returns:
 *  r2, r3, r4 corrupted
 *  r5 = physical start address of RAM
 *  r6 = physical address of IO
 *  r7 = byte offset into page tables for IO
 */
__lookup_architecture_type:
adr r4, 2b
ldmia r4, {r2, r3, r5, r6, r7} @ throw away r2, r3
sub r5, r4, r5 @ convert addresses
add r4, r6, r5 @ to our address space
add r7, r7, r5
1: ldr r5, [r4] @ get machine type
teq r5, r1
beq 2f
add r4, r4, #SIZEOF_MACHINE_DESC
cmp r4, r7
blt 1b
mov r7, #0 @ unknown architecture
mov pc, lr
2: ldmib r4, {r5, r6, r7} @ found, get results
mov pc, lr
/*****************************************************************************************************************************************/
/* mm-armv.c */
/*
 *  linux/arch/arm/mm/mm-armv.c
 *
 *  Copyright (C) 1998-2000 Russell King
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 *  Page table sludge for ARM v3 and v4 processor architectures.
 */
#include
#include
#include
#include


#include
#include
#include
#include
#include


#include


/*
 * These are useful for identifing cache coherency
 * problems by allowing the cache or the cache and
 * writebuffer to be turned off.  (Note: the write
 * buffer should not be on and the cache off).
 */
static int __init nocache_setup(char *__unused)
{
cr_alignment &= ~4;
cr_no_alignment &= ~4;
flush_cache_all();
set_cr(cr_alignment);
return 1;
}


static int __init nowrite_setup(char *__unused)
{
cr_alignment &= ~(8|4);
cr_no_alignment &= ~(8|4);
flush_cache_all();
set_cr(cr_alignment);
return 1;
}


static int __init noalign_setup(char *__unused)
{
cr_alignment &= ~2;
cr_no_alignment &= ~2;
set_cr(cr_alignment);
return 1;
}


__setup("noalign", noalign_setup);
__setup("nocache", nocache_setup);
__setup("nowb", nowrite_setup);


#define FIRST_KERNEL_PGD_NR (FIRST_USER_PGD_NR + USER_PTRS_PER_PGD)


#define clean_cache_area(start,size) \
cpu_cache_clean_invalidate_range((unsigned long)start, ((unsigned long)start) + size, 0);




/*
 * need to get a 16k page for level 1
 */
 /* 如果缓存队列中没有多余的页面则通过物理页面分配器完成
    该函数用于分配全局目录表*/
pgd_t *get_pgd_slow(struct mm_struct *mm)
{
pgd_t *new_pgd, *init_pgd;
pmd_t *new_pmd, *init_pmd;
pte_t *new_pte, *init_pte;


new_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, 2);
if (!new_pgd)
goto no_pgd;


memzero(new_pgd, FIRST_KERNEL_PGD_NR * sizeof(pgd_t));


init_pgd = pgd_offset_k(0);


if (vectors_base() == 0) {
init_pmd = pmd_offset(init_pgd, 0);
init_pte = pte_offset(init_pmd, 0);


/*
* This lock is here just to satisfy pmd_alloc and pte_lock
*/
spin_lock(&mm->page_table_lock);


/*
* On ARM, first page must always be allocated since it
* contains the machine vectors.
*/
new_pmd = pmd_alloc(mm, new_pgd, 0);
if (!new_pmd)
goto no_pmd;


new_pte = pte_alloc(mm, new_pmd, 0);
if (!new_pte)
goto no_pte;


set_pte(new_pte, *init_pte);


spin_unlock(&mm->page_table_lock);
}


/*
* Copy over the kernel and IO PGD entries
*/
memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR,
      (PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t));


/*
* FIXME: this should not be necessary
*/
clean_cache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t));


return new_pgd;


no_pte:
spin_unlock(&mm->page_table_lock);
pmd_free(new_pmd);
free_pages((unsigned long)new_pgd, 2);
return NULL;


no_pmd:
spin_unlock(&mm->page_table_lock);
free_pages((unsigned long)new_pgd, 2);
return NULL;


no_pgd:
return NULL;
}


/* 释放全局目录表 */
void free_pgd_slow(pgd_t *pgd)
{
pmd_t *pmd;
pte_t *pte;


if (!pgd)
return;


/* pgd is always present and good */
pmd = (pmd_t *)pgd;
if (pmd_none(*pmd))
goto free;
if (pmd_bad(*pmd)) {
pmd_ERROR(*pmd);
pmd_clear(pmd);
goto free;
}


pte = pte_offset(pmd, 0);
pmd_clear(pmd);
pte_free(pte);
pmd_free(pmd);
free:
free_pages((unsigned long) pgd, 2);
}


/*
 * Create a SECTION PGD between VIRT and PHYS in domain
 * DOMAIN with protection PROT
 */
static inline void
alloc_init_section(unsigned long virt, unsigned long phys, int prot)
{
pmd_t pmd;


pmd_val(pmd) = phys | prot;


set_pmd(pmd_offset(pgd_offset_k(virt), virt), pmd);
}


/*
 * Add a PAGE mapping between VIRT and PHYS in domain
 * DOMAIN with protection PROT.  Note that due to the
 * way we map the PTEs, we must allocate two PTE_SIZE'd
 * blocks - one for the Linux pte table, and one for
 * the hardware pte table.
 */
static inline void
alloc_init_page(unsigned long virt, unsigned long phys, int domain, int prot)
{
pmd_t *pmdp;
pte_t *ptep;


pmdp = pmd_offset(pgd_offset_k(virt), virt);


if (pmd_none(*pmdp)) {
pte_t *ptep = alloc_bootmem_low_pages(2 * PTRS_PER_PTE *
     sizeof(pte_t));


ptep += PTRS_PER_PTE;


set_pmd(pmdp, __mk_pmd(ptep, PMD_TYPE_TABLE | PMD_DOMAIN(domain)));
}
ptep = pte_offset(pmdp, virt);


set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, __pgprot(prot)));
}


/*
 * Clear any PGD mapping.  On a two-level page table system,
 * the clearance is done by the middle-level functions (pmd)
 * rather than the top-level (pgd) functions.
 */
static inline void clear_mapping(unsigned long virt)
{
pmd_clear(pmd_offset(pgd_offset_k(virt), virt));
}


/*
 * Create the page directory entries and any necessary
 * page tables for the mapping specified by `md'.  We
 * are able to cope here with varying sizes and address
 * offsets, and we take full advantage of sections.
 */
static void __init create_mapping(struct map_desc *md)
{
unsigned long virt, length;
int prot_sect, prot_pte;
long off;


if (md->prot_read && md->prot_write &&
   !md->cacheable && !md->bufferable) {
printk(KERN_WARNING "Security risk: creating user "
      "accessible mapping for 0x%08lx at 0x%08lx\n",
      md->physical, md->virtual);
}


if (md->virtual != vectors_base() && md->virtual < PAGE_OFFSET) {
printk(KERN_WARNING "MM: not creating mapping for "
      "0x%08lx at 0x%08lx in user region\n",
      md->physical, md->virtual);
}


prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
  (md->prot_read  ? L_PTE_USER       : 0) |
  (md->prot_write ? L_PTE_WRITE      : 0) |
  (md->cacheable  ? L_PTE_CACHEABLE  : 0) |
  (md->bufferable ? L_PTE_BUFFERABLE : 0);


prot_sect = PMD_TYPE_SECT | PMD_DOMAIN(md->domain) |
   (md->prot_read  ? PMD_SECT_AP_READ    : 0) |
   (md->prot_write ? PMD_SECT_AP_WRITE   : 0) |
   (md->cacheable  ? PMD_SECT_CACHEABLE  : 0) |
   (md->bufferable ? PMD_SECT_BUFFERABLE : 0);


virt   = md->virtual;
off    = md->physical - virt;
length = md->length;


while ((virt & 0xfffff || (virt + off) & 0xfffff) && length >= PAGE_SIZE) {
alloc_init_page(virt, virt + off, md->domain, prot_pte);


virt   += PAGE_SIZE;
length -= PAGE_SIZE;
}


while (length >= PGDIR_SIZE) {
alloc_init_section(virt, virt + off, prot_sect);


virt   += PGDIR_SIZE;
length -= PGDIR_SIZE;
}


while (length >= PAGE_SIZE) {
alloc_init_page(virt, virt + off, md->domain, prot_pte);


virt   += PAGE_SIZE;
length -= PAGE_SIZE;
}
}


/*
 * In order to soft-boot, we need to insert a 1:1 mapping in place of
 * the user-mode pages.  This will then ensure that we have predictable
 * results when turning the mmu off
 */
void setup_mm_for_reboot(char mode)
{
pgd_t *pgd;
pmd_t pmd;
int i;


if (current->mm && current->mm->pgd)
pgd = current->mm->pgd;
else
pgd = init_mm.pgd;


for (i = 0; i < FIRST_USER_PGD_NR + USER_PTRS_PER_PGD; i++) {
pmd_val(pmd) = (i << PGDIR_SHIFT) |
PMD_SECT_AP_WRITE | PMD_SECT_AP_READ |
PMD_TYPE_SECT;
set_pmd(pmd_offset(pgd + i, i << PGDIR_SHIFT), pmd);
}
}


/*
 * Setup initial mappings.  We use the page we allocated for zero page to hold
 * the mappings, which will get overwritten by the vectors in traps_init().
 * The mappings must be in virtual address order.
 */
  /* 页表初始化, 该函数初始化对应于ZONE_DMA和ZONE_NORMAL的所有物理内存所必要的页表 */
void __init memtable_init(struct meminfo *mi)
{
struct map_desc *init_maps, *p, *q;
unsigned long address = 0;
int i;


init_maps = p = alloc_bootmem_low_pages(PAGE_SIZE);


for (i = 0; i < mi->nr_banks; i++) {
if (mi->bank[i].size == 0)
continue;


p->physical   = mi->bank[i].start;
p->virtual    = __phys_to_virt(p->physical);
p->length     = mi->bank[i].size;
p->domain     = DOMAIN_KERNEL;
p->prot_read  = 0;
p->prot_write = 1;
p->cacheable  = 1;
p->bufferable = 1;


p ++;
}


#ifdef FLUSH_BASE
p->physical   = FLUSH_BASE_PHYS;
p->virtual    = FLUSH_BASE;
p->length     = PGDIR_SIZE;
p->domain     = DOMAIN_KERNEL;
p->prot_read  = 1;
p->prot_write = 0;
p->cacheable  = 1;
p->bufferable = 1;


p ++;
#endif


#ifdef FLUSH_BASE_MINICACHE
p->physical   = FLUSH_BASE_PHYS + PGDIR_SIZE;
p->virtual    = FLUSH_BASE_MINICACHE;
p->length     = PGDIR_SIZE;
p->domain     = DOMAIN_KERNEL;
p->prot_read  = 1;
p->prot_write = 0;
p->cacheable  = 1;
p->bufferable = 0;


p ++;
#endif


/*
* Go through the initial mappings, but clear out any
* pgdir entries that are not in the description.
*/
q = init_maps;
do {
if (address < q->virtual || q == p) {
clear_mapping(address);
address += PGDIR_SIZE;
} else {
create_mapping(q);


address = q->virtual + q->length;
address = (address + PGDIR_SIZE - 1) & PGDIR_MASK;


q ++;
}
} while (address != 0);


/*
* Create a mapping for the machine vectors at virtual address 0
* or 0xffff0000.  We should always try the high mapping.
*/
init_maps->physical   = virt_to_phys(init_maps);
init_maps->virtual    = vectors_base();
init_maps->length     = PAGE_SIZE;
init_maps->domain     = DOMAIN_USER;
init_maps->prot_read  = 0;
init_maps->prot_write = 0;
init_maps->cacheable  = 1;
init_maps->bufferable = 0;


create_mapping(init_maps);
}


/*
 * Create the architecture specific mappings
 */
void __init iotable_init(struct map_desc *io_desc)
{
int i;


for (i = 0; io_desc[i].last == 0; i++)
create_mapping(io_desc + i);
}


static inline void free_memmap(int node, unsigned long start, unsigned long end)
{
unsigned long pg, pgend;


start = __phys_to_virt(start);
end   = __phys_to_virt(end);


pg    = PAGE_ALIGN((unsigned long)(virt_to_page(start)));
pgend = ((unsigned long)(virt_to_page(end))) & PAGE_MASK;


start = __virt_to_phys(pg);
end   = __virt_to_phys(pgend);


free_bootmem_node(NODE_DATA(node), start, end - start);
}


static inline void free_unused_memmap_node(int node, struct meminfo *mi)
{
unsigned long bank_start, prev_bank_end = 0;
unsigned int i;


/*
* [FIXME] This relies on each bank being in address order.  This
* may not be the case, especially if the user has provided the
* information on the command line.
*/
for (i = 0; i < mi->nr_banks; i++) {
if (mi->bank[i].size == 0 || mi->bank[i].node != node)
continue;


bank_start = mi->bank[i].start & PAGE_MASK;


/*
* If we had a previous bank, and there is a space
* between the current bank and the previous, free it.
*/
if (prev_bank_end && prev_bank_end != bank_start)
free_memmap(node, prev_bank_end, bank_start);


prev_bank_end = PAGE_ALIGN(mi->bank[i].start +
  mi->bank[i].size);
}
}


/*
 * The mem_map array can get very big.  Free
 * the unused area of the memory map.
 */
void __init create_memmap_holes(struct meminfo *mi)
{
int node;


for (node = 0; node < numnodes; node++)
free_unused_memmap_node(node, mi);
}


/*
 * PTE table allocation cache.
 *
 * This is a move away from our custom 2K page allocator.  We now use the
 * slab cache to keep track of these objects.
 *
 * With this, it is questionable as to whether the PGT cache gains us
 * anything.  We may be better off dropping the PTE stuff from our PGT
 * cache implementation.
 */
kmem_cache_t *pte_cache;


/*
 * The constructor gets called for each object within the cache when the
 * cache page is created.  Note that if slab tries to misalign the blocks,
 * we BUG() loudly.
 */
static void pte_cache_ctor(void *pte, kmem_cache_t *cache, unsigned long flags)
{
unsigned long block = (unsigned long)pte;


if (block & 2047)
BUG();


memzero(pte, 2 * PTRS_PER_PTE * sizeof(pte_t));
cpu_cache_clean_invalidate_range(block, block +
PTRS_PER_PTE * sizeof(pte_t), 0);
}


void __init pgtable_cache_init(void)
{
pte_cache = kmem_cache_create("pte-cache",
2 * PTRS_PER_PTE * sizeof(pte_t), 0, 0,
pte_cache_ctor, NULL);
if (!pte_cache)
BUG();
}
/*****************************************************************************************************************************************/
/* init.c */
 /* 初始化页表,该函数用于完成页表收尾工作*/
void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc)
{
void *zero_page;
int node;


memcpy(&meminfo, mi, sizeof(meminfo));


/*
* allocate the zero page.  Note that we count on this going ok.
*/
zero_page = alloc_bootmem_low_pages(PAGE_SIZE);


/*
* initialise the page tables.
*/
memtable_init(mi); /* 页表初始化, 该函数初始化对应于ZONE_DMA和ZONE_NORMAL的所有物理内存所必要的页表 */
if (mdesc->map_io)
mdesc->map_io();
flush_cache_all();
flush_tlb_all();


/*
* initialise the zones within each node
*/
for (node = 0; node < numnodes; node++) {
unsigned long zone_size[MAX_NR_ZONES];
unsigned long zhole_size[MAX_NR_ZONES];
struct bootmem_data *bdata;
pg_data_t *pgdat;
int i;


/*
* Initialise the zone size information.
*/
for (i = 0; i < MAX_NR_ZONES; i++) {
zone_size[i]  = 0;
zhole_size[i] = 0;
}


pgdat = NODE_DATA(node);
bdata = pgdat->bdata;


/*
* The size of this node has already been determined.
* If we need to do anything fancy with the allocation
* of this memory to the zones, now is the time to do
* it.
*/
zone_size[0] = bdata->node_low_pfn -
(bdata->node_boot_start >> PAGE_SHIFT);


/*
* If this zone has zero size, skip it.
*/
if (!zone_size[0])
continue;


/*
* For each bank in this node, calculate the size of the
* holes.  holes = node_size - sum(bank_sizes_in_node)
*/
zhole_size[0] = zone_size[0];
for (i = 0; i < mi->nr_banks; i++) {
if (mi->bank[i].node != node)
continue;


zhole_size[0] -= mi->bank[i].size >> PAGE_SHIFT;
}


/*
* Adjust the sizes according to any special
* requirements for this machine type.
*/
arch_adjust_zones(node, zone_size, zhole_size);


free_area_init_node(node, pgdat, 0, zone_size,
bdata->node_boot_start, zhole_size);
}


/*
* finish off the bad pages once
* the mem_map is initialised
*/
memzero(zero_page, PAGE_SIZE);
empty_zero_page = virt_to_page(zero_page);
flush_dcache_page(empty_zero_page);
}





阅读(2479) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~