based on linux-3.0 kernel
---------------------------
1. kernel作用swapper_pg_dir 作为PGD表基址,应用程序使用mm->pgd保存PGD表基址。
2. 应用程序基址创建路径:copy_mm->dup_mm->mm_init->mm_alloc_pgd->pgd_alloc.
pgd_alloc.
arch/powerpc/mm/pgtable_32.c:
- pgd_t *pgd_alloc(struct mm_struct *mm)
-
{
-
pgd_t *ret;
-
-
/* pgdir take page or two with 4K pages and a page fraction otherwise */
-
#ifndef CONFIG_PPC_4K_PAGES
-
ret = kzalloc(1 << PGDIR_ORDER, GFP_KERNEL);
-
#else
-
ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
-
PGDIR_ORDER - PAGE_SHIFT);
-
#endif
-
return ret;
-
}
3. 进程创建PGD表后,并没有完成PTE表的map, 一来因为加快创建速度,二来是因为创建时还不知道需要初始化哪些PTE表项.
4. 当进程使用的虚地址没有在TLB0中命中时, 将产生ITLB(intruction TLB)或者DTLB(Data TLB) Error Interrupt, 中断处理程序会搜索PTE表,查找相应的ENTRY, 如果没有找到则创建该PTE表项。
5. 当第一次访问使用页式映射的地址时,会进入DSI(data storage interrupt)异常或者ISI(instruction storage interrupt),在异常处理程序中创建PTE表的相应entry, 因为此时在TLB0中不会有该页面对应的信息。
arch/powerpc/kernel/head_fsl_booke.S:
handle_page_fault->do_page_fault->handle_mm_fault->handle_pte_fault
6. Data TLB Error Interrupt处理(instruction TLB error interrupt处理类似):
中断时,硬件自动保存下列寄存器:
SRR0:保存被中断指令的有效地址,以便异常处理结束后返回
SRR1: 中断时的MSR值,用来恢复现场
MSR: CE,ME,DE位不变,其它清0
DEAR: 保存引发此异常的数据有效地址
ESR: 异常的原因
MSRn: TLB相关信息
入口地址设置:
arch/powerpc/kernel/head_fsl_booke.S:
-> SET_IVOR(13, DataTLBError);
DataTLBError:
- /* Data TLB Error Interrupt */
-
START_EXCEPTION(DataTLBError) //定义DataTLBError 标记
- /*
-
* Exception vectors.
-
*/
-
#define START_EXCEPTION(label) \
-
.align 5; \ //2^5=32bit, 即4字节对齐
-
label:
-
mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
-
mfspr r10, SPRN_SPRG_THREAD
-
stw r11, THREAD_NORMSAVE(0)(r10)
-
stw r12, THREAD_NORMSAVE(1)(r10)
-
stw r13, THREAD_NORMSAVE(2)(r10)
-
mfcr r13
-
stw r13, THREAD_NORMSAVE(3)(r10) //保存r11-r13寄存器到struct thread_struct->normsave
-
DO_KVM BOOKE_INTERRUPT_DTLB_MISS SPRN_SRR1 ??不是很明白
-
mfspr r10, SPRN_DEAR /* Get faulting address */
-
-
/* If we are faulting a kernel address, we have to use the
-
* kernel page tables.
-
*/
-
lis r11, PAGE_OFFSET@h
-
cmplw 5, r10, r11 //r11=0xc000 0000, r10为引起中断的有效地址
-
blt 5, 3f //如果小于,则表明是用户进程地址,跳到3f处执行
-
lis r11, swapper_pg_dir@h //加载内核空间的PGD表基址
-
ori r11, r11, swapper_pg_dir@l
-
-
mfspr r12,SPRN_MAS1 /* Set TID to 0 */
-
rlwinm r12,r12,0,16,1
-
mtspr SPRN_MAS1,r12
-
-
b 4f
-
/* Get the PGD for the current thread */
-
3:
-
mfspr r11,SPRN_SPRG_THREAD //struct thread_struct基址
-
lwz r11,PGDIR(r11)
-
-
4:
-
/* Mask of required permission bits. Note that while we
-
* do copy ESR:ST to _PAGE_RW position as trying to write
-
* to an RO page is pretty common, we don't do it with
-
* _PAGE_DIRTY. We could do it, but it's a fairly rare
-
* event so I'd rather take the overhead when it happens
-
* rather than adding an instruction here. We should measure
-
* whether the whole thing is worth it in the first place
-
* as we could avoid loading SPRN_ESR completely in the first
-
* place...
-
*
-
* TODO: Is it worth doing that mfspr & rlwimi in the first
-
* place or can we save a couple of instructions here ?
-
*/
-
mfspr r12,SPRN_ESR
-
#ifdef CONFIG_PTE_64BIT
-
li r13,_PAGE_PRESENT
-
oris r13,r13,_PAGE_ACCESSED@h
-
#else
-
li r13,_PAGE_PRESENT|_PAGE_ACCESSED
-
#endif
-
rlwimi r13,r12,11,29,29
-
-
FIND_PTE
- #define FIND_PTE \
-
rlwimi r11, r10, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \
-
lwz r11, 0(r11); /* Get L1 entry */ \
-
rlwinm. r12, r11, 0, 0, 19; /* Extract L2 (pte) base address */ \
-
beq 2f; /* Bail if no table */ \ //如果没有找到PTE表,则到2f处,在2f处建立PTE表
-
rlwimi r12, r10, 22, 20, 29; /* Compute PTE address */ \
-
lwz r11, 0(r12); /* Get Linux PTE */ //r11保存PTE表的entry
-
#endif
-
andc. r13,r13,r11 /* Check permission */
-
-
#ifdef CONFIG_PTE_64BIT
-
#ifdef CONFIG_SMP
-
subf r13,r11,r12 /* create false data dep */
-
lwzx r13,r11,r13 /* Get upper pte bits */
-
#else
-
lwz r13,0(r12) /* Get upper pte bits */
-
#endif
-
#endif
- //页面不在物理内在,跳到2f处,在2f处会调用handle_page_fault去处理这种情况
-
bne 2f /* Bail if permission/valid mismach */
- //由于有了PTE表,此时可以更新TLB0的ENTRY,完成TLB MISS 处理
- //处理完成后,中断返回
-
/* Jump to common tlb load */
-
b finish_tlb_load
-
2: //恢复寄存器,并调用DataStorage继续处理,因为tlb miss中断并没有完成TLB的更新
-
/* The bailout. Restore registers to pre-exception conditions
-
* and call the heavyweights to help us out.
-
*/
-
/* The bailout. Restore registers to pre-exception conditions
-
* and call the heavyweights to help us out.
-
*/
-
mfspr r10, SPRN_SPRG_THREAD
-
lwz r11, THREAD_NORMSAVE(3)(r10)
-
mtcr r11
-
lwz r13, THREAD_NORMSAVE(2)(r10)
-
lwz r12, THREAD_NORMSAVE(1)(r10)
-
lwz r11, THREAD_NORMSAVE(0)(r10)
-
mfspr r10, SPRN_SPRG_RSCRATCH0
-
b DataStorage
7. DSI/ISI(Data Storage Interrupt, Intruction Storage Interrupt)中断的处理:
两种异常处理非常相似,以DSI为例.
产生原因:
a: 读写MMU产生的异常,重点处事,因为此类异常是操作系统故意设置的,以便用此异常来设置MMU表项
b: 地址空间跨越页边界,因为powerpc全用大端模式,所以不般不会发生这类情况
c: 试图改变已经被锁定的cache行,此类错误无法处理
d: lwarx, stwcx指令对cache-inhibited空间进行访问,此类错误无法处理
自动保存的寄存器:
SRR0:中断返回的地址
SRR1: MSR内容
ESR: 引发异常的条件及状态
MSR:CE,ME,DE位保留,其余位清0
DEAR: 保存引发DSI异常的数据有效地址,即对哪一个
数据进行访问引发的异常
入口:
arch/powerpc/kernel/head_fsl_booke.S
->SET_IVOR(2, DataStorage);
DataStorage:
- /* Data Storage Interrupt */
-
START_EXCEPTION(DataStorage)
-
NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_DATA_STORAGE)
- //此宏作用有两个:1. 确定中断使用的堆栈空间 2. 将中断处理程序使用的通用寄存器和状态寄存器压入中断堆栈保存,为do_IRQ函数提供运行空间
- #define NORMAL_EXCEPTION_PROLOG(ivor_nr) \
-
mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \
-
mfspr r10, SPRN_SPRG_THREAD; \
-
stw r11, THREAD_NORMSAVE(0)(r10); \
-
stw r13, THREAD_NORMSAVE(2)(r10); \
-
mfcr r13; /* save CR in r13 for now */\
-
DO_KVM ivor_nr SPRN_SRR1; \ //保存r10-r13寄存器
-
mfspr r11,SPRN_SRR1; /* check whether user or kernel */\ //SPRN_SRR1保存了中断前的MSR
-
andi. r11,r11,MSR_PR; \ //检查中断前的MSR的MSR_PR位,如果为0表示在核心空间被中断了
-
mr r11, r1; \ //r1为核心态堆栈指针,将其赋给r11
-
beq 1f; \ //如果在内核空间,则到1f处, 此时r11已经指向栈顶
- /*用户空间栈顶指针计算:
- * r10==r3: 保存的是被中断进程的struct thread_struct thread地址
- * -THREAD(r10): 获得了task_struct结构体地址,该地址值为:thread结构体地址-thread元素的偏移地址
- * r11为struct thread_info地址:r11= task_struct地址+4,即+THREAD_INFO
- * ALLOC_STACK_FRAME(r11, THREAD_SIZE); 得到栈顶指针:thread_info地址+8K,即+THREAD_SIZE
- *
- * ------------ --->high address
- *| | |
- *| stack | | task_struct
- *|-----------| 8K大小 task -------
- *|thread_info| | ------------->| |---->stack = task_struct + 4
- *------------- --->low <-------- --- | |--->struct thread_struct = r3 = r10
- * stack ------- = task_struct + THREAD
- * union thread_union {
* struct thread_info thread_info;
* unsigned long stack[THREAD_SIZE/sizeof(long)];
* };
*
- */
-
/* if from user, start at top of this thread's kernel stack */ \
-
lwz r11, THREAD_INFO-THREAD(r10); \
- #define THREAD_INFO 4 /* offsetof(struct task_struct, stack) # */
- #define THREAD 480 /* offsetof(struct task_struct, thread) # */
-
ALLOC_STACK_FRAME(r11, THREAD_SIZE); \
- #if (THREAD_SHIFT < 15)
-
#define ALLOC_STACK_FRAME(reg, val) \
-
addi reg,reg,val // use this
-
#else
-
#define ALLOC_STACK_FRAME(reg, val) \
-
addis reg,reg,val@ha; \
-
addi reg,reg,val@l
-
#endif
- //开辟堆栈空间,大小为INT_FRAME_SIZE = STACK_INT_FRAME_SIZE =
- //(sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
-
1 : subi r11, r11, INT_FRAME_SIZE; /* Allocate exception frame */ \
-
stw r13, _CCR(r11); /* save various registers */ \ //保存r13
-
stw r12,GPR12(r11); \ //保存r12, r9
-
stw r9,GPR9(r11); \
-
mfspr r13, SPRN_SPRG_RSCRATCH0; \ //保存r10
-
stw r13, GPR10(r11); \
-
lwz r12, THREAD_NORMSAVE(0)(r10); \ //保存f11
-
stw r12,GPR11(r11); \
-
lwz r13, THREAD_NORMSAVE(2)(r10); /* restore r13 */ \
-
mflr r10; \
-
stw r10,_LINK(r11); \ //保存lr
-
mfspr r12,SPRN_SRR0; \
-
stw r1, GPR1(r11); \ //保存中断返回地址,即SRR0
-
mfspr r9,SPRN_SRR1; \
-
stw r1, 0(r11); \ //保存中断前的MSR,即SRR1
-
mr r1, r11; \
-
rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
-
stw r0,GPR0(r11); \
-
lis r10, STACK_FRAME_REGS_MARKER@ha;/* exception frame marker */ \
-
addi r10, r10, STACK_FRAME_REGS_MARKER@l; \
-
stw r10, 8(r11); \
-
SAVE_4GPRS(3, r11); \ //保存gpr3-gpr8寄存器
-
SAVE_2GPRS(7, r11)
- //总结:保存好r3-r12寄存器,建立好堆栈结构,把中断处理程序之前的MSR及中断返回地址存入r9及r12
-
mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
-
stw r5,_ESR(r11)
-
mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ //保存ESR及DEAR寄存器
-
andis. r10,r5,(ESR_ILK|ESR_DLK)@h //如果发生了ILK或DLK异常,则到1f处执行CacheLockingException
-
bne 1f
-
EXC_XFER_EE_LITE(0x0300, handle_page_fault)
- #define EXC_XFER_EE_LITE(n, hdlr) \
-
EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \
-
ret_from_except)
- #define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \
-
li r10,trap; \
-
stw r10,_TRAP(r11); \ //保存trap值,即0x0300+1,可能是为了调试
-
lis r10,msr@h; \
-
ori r10,r10,msr@l; \
-
copyee(r10, r9); \ //如果copyee=NOCOPY,则MSR的EE位保持不变,如果为COPY_EE则MSR的EE位更改为中断前的MSR的EE位,r9为中断前的MSR内容
- #define COPY_EE(d, s) rlwimi d,s,0,16,16
-
#define NOCOPY(d, s)
- //依次调用函数tfer, hdlr, ret
- bl tfer; \ // tfer=transfer_to_handler
-
.long hdlr; \ //hdlr=handle_page_fault
-
.long ret //ret = ret_from_except
- .globl transfer_to_handler
transfer_to_handler:
stw r2,GPR2(r11) //保存GPR2,即current指针
stw r12,_NIP(r11) //保存r12,即中断返回地址
stw r9,_MSR(r11) //保存进入中断前的msr
andi. r2,r9,MSR_PR //判断当前中断在用户态还是核心态
mfctr r12 //ctr->r12
mfspr r2,SPRN_XER //xer->r2
stw r12,_CTR(r11) //保存这两个寄存器的值
stw r2,_XER(r11)
mfspr r12,SPRN_SPRG_THREAD //thread_struct->r12
addi r2,r12,-THREAD //获得task_struct地址值,并存入r2中
tovirt(r2,r2) /* set r2 to current */
- #define tovirt(rd,rs) \
-
addis rd,rs,0
beq 2f /* if from user, fix up THREAD.regs */
addi r11,r1,STACK_FRAME_OVERHEAD //中断发生在用户态情况, r11为栈顶指针
stw r11,PT_REGS(r12) //用r11值更新thread-reg参数 - //如果调试或被跟踪,则清除相关事件,并跳到3f处继续
#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
/* Check to see if the dbcr0 register is set up to debug. Use the
internal debug mode bit to do this. */
lwz r12,THREAD_DBCR0(r12)
andis. r12,r12,DBCR0_IDM@h
beq+ 3f
/* From user and task is ptraced - load up global dbcr0 */
li r12,-1 /* clear all pending debug events */
mtspr SPRN_DBSR,r12
lis r11,global_dbcr0@ha
tophys(r11,r11)
addi r11,r11,global_dbcr0@l
#ifdef CONFIG_SMP
rlwinm r9,r1,0,0,(31-THREAD_SHIFT)
lwz r9,TI_CPU(r9)
slwi r9,r9,3
add r11,r11,r9
#endif
lwz r12,0(r11)
mtspr SPRN_DBCR0,r12
lwz r12,4(r11)
addi r12,r12,-1
stw r12,4(r11)
#endif
b 3f
2: /* if from kernel, check interrupted DOZE/NAP mode and
* check for stack overflow
*/
lwz r9,KSP_LIMIT(r12)
cmplw r1,r9 /* if r1 <= ksp_limit */
ble- stack_ovf /* then the kernel stack overflowed */
5:
#if defined(CONFIG_6xx) || defined(CONFIG_E500)
rlwinm r9,r1,0,0,31-THREAD_SHIFT
tophys(r9,r9) /* check local flags */
lwz r12,TI_LOCAL_FLAGS(r9)
mtcrf 0x01,r12
bt- 31-TLF_NAPPING,4f
bt- 31-TLF_SLEEPING,7f
#endif /* CONFIG_6xx || CONFIG_E500 */
.globl transfer_to_handler_cont
transfer_to_handler_cont:
3:
mflr r9 //将返回地址赋于r9,即.long hdlr; \ //hdlr=handle_page_fault语句地址
lwz r11,0(r9) /* virtual address of handler */
lwz r9,4(r9) /* where to go when done */ //.long ret_from_except地址
#ifdef CONFIG_TRACE_IRQFLAGS
#ifdef CONFIG_BOOKE
mtmsr r10
#else
lis r12,reenable_mmu@h
ori r12,r12,reenable_mmu@l
mtspr SPRN_SRR0,r12
mtspr SPRN_SRR1,r10
SYNC
RFI
reenable_mmu: /* re-enable mmu so we can */
mfmsr r10
#endif /* !CONFIG_BOOKE */
lwz r12,_MSR(r1)
xor r10,r10,r12
andi. r10,r10,MSR_EE /* Did EE change? */
beq 1f
/* Save handler and return address into the 2 unused words
* of the STACK_FRAME_OVERHEAD (sneak sneak sneak). Everything
* else can be recovered from the pt_regs except r3 which for
* normal interrupts has been set to pt_regs and for syscalls
* is an argument, so we temporarily use ORIG_GPR3 to save it
*/
stw r9,8(r1)
stw r11,12(r1)
stw r3,ORIG_GPR3(r1)
bl trace_hardirqs_off
lwz r0,GPR0(r1)
lwz r3,ORIG_GPR3(r1)
lwz r4,GPR4(r1)
lwz r5,GPR5(r1)
lwz r6,GPR6(r1)
lwz r7,GPR7(r1)
lwz r8,GPR8(r1)
lwz r9,8(r1)
lwz r11,12(r1)
1: mtctr r11
mtlr r9
bctr /* jump to handler */
#else /* CONFIG_TRACE_IRQFLAGS */
#ifdef CONFIG_BOOKE
/*
* We're not changing address space on Book E, and the
* extra rfi can hurt when virtualized -- whereas
* mtmsr can be paravirtualized.
*/
mtmsr r10
mtctr r11
mtlr r9 //把调用返回地址设为r9
bctr //此处会调用ctr内的值,即r11函数,即中断处理函数handle_page_fault执行,执行完中断处理函数后,会接着执行r9处函数,因为此时r9被设为了返回地址,即执行:ret_from_except函数
#else
mtspr SPRN_SRR0,r11
mtspr SPRN_SRR1,r10
mtlr r9
SYNC
RFI /* jump to handler, enable MMU */
#endif /* !CONFIG_BOOKE */
#endif /* CONFIG_TRACE_IRQFLAGS */
#if defined (CONFIG_6xx) || defined(CONFIG_E500)
4: rlwinm r12,r12,0,~_TLF_NAPPING
stw r12,TI_LOCAL_FLAGS(r9)
b power_save_ppc32_restore
7: rlwinm r12,r12,0,~_TLF_SLEEPING
stw r12,TI_LOCAL_FLAGS(r9)
lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */
rlwinm r9,r9,0,~MSR_EE
lwz r12,_LINK(r11) /* and return to address in LR */
b fast_exception_return
#endif
/*
* On kernel stack overflow, load up an initial stack pointer
* and call StackOverflow(regs), which should not return.
*/
stack_ovf:
/* sometimes we use a statically-allocated stack, which is OK. */
lis r12,_end@h
ori r12,r12,_end@l
cmplw r1,r12
ble 5b /* r1 <= &_end is OK */
SAVE_NVGPRS(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
lis r1,init_thread_union@ha
addi r1,r1,init_thread_union@l
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
lis r9,StackOverflow@ha
addi r9,r9,StackOverflow@l //StackOverflow函数会调用panic,使系统结束
LOAD_MSR_KERNEL(r10,MSR_KERNEL)
FIX_SRR1(r10,r12)
mtspr SPRN_SRR0,r9
mtspr SPRN_SRR1,r10
SYNC
RFI
-
1: //与上面类似,只是hdlr函数为CacheLockingException
-
addi r3,r1,STACK_FRAME_OVERHEAD
-
EXC_XFER_EE_LITE(0x0300, CacheLockingException)
-
阅读(4138) | 评论(0) | 转发(0) |