2010年(130)
分类: LINUX
2010-01-10 17:16:23
浅析armlinux-sp的孵化流程,1号内核线程init的创建
文章来源:http://gliethttp.cublog.cn
接续上一篇《浅析armlinux-sp进程切换栈结构和切换函数__switch_to()》,研究一下sp从内核系统启动到内核线程init启动的变化过程.
当系统启动的时候,她运行在核心态,这时,系统中只有一个进程:初始化进程(init_task).象所有其它进程一样,初始化进程有一个堆栈、寄存器等表示的
机器状态(TSS).当系统中其它进程运行时,这些信息保存在初始化进程的task_struct数据结构中.在系统初始化结束时,初始化进程创建并启动一个核心线程(init),
然后自己进入空循环(idle).当系统中没有其它可以运行的进程时,调度程序会运行这个空闲进程.这个空闲进程的task_struct,即:init_task_union,是唯一一个不是动态分配,而是
在内核连接时静态定义的结构,为了不至于混淆,该进程叫做init_task.
空闲进程init_task的进程标识符pid是0,核心进程init的进程标识符pid是1.init是系统中第一个真正的进程,它执行一些系统初始化设置.
-----------------------------------------------------------------------
1.arch/arm/kernel/head-armv.S
//init_task的静态栈空间
...
.type __switch_data, %object
__switch_data: .long __mmap_switched
.long SYMBOL_NAME(__bss_start)
.long SYMBOL_NAME(_end)
.long SYMBOL_NAME(processor_id)
.long SYMBOL_NAME(__machine_arch_type)
.long SYMBOL_NAME(cr_alignment)
.long SYMBOL_NAME(init_task_union)+8192
...
.align 5
__mmap_switched:
adr r3, __switch_data + 4
ldmia r3, {r4, r5, r6, r7, r8, sp}@ r2 = compat
@ sp = stack pointer
mov fp, #0 @ Clear BSS (and zero fp)
1: cmp r4, r5
strcc fp, [r4],#4
bcc 1b
str r9, [r6] @ Save processor ID
str r1, [r7] @ Save machine type
#ifdef CONFIG_ALIGNMENT_TRAP
orr r0, r0, #2 @ ...........A.
#endif
bic r2, r0, #2 @ Clear 'A' bit
stmia r8, {r0, r2} @ Save control register values
b SYMBOL_NAME(start_kernel)
...
-----------------------------------------------------------------------
2.反汇编__switch_data数据
c0008038 <__switch_data>:
c0008038: c0008080 andgt r8, r0, r0, lsl #1
c000803c: c0120660 andgts r0, r2, r0, ror #12
c0008040: c01415c8 andgts r1, r4, r8, asr #11
c0008044: c01213b8 ldrgth r1, [r2], -r8
c0008048: c01213ac andgts r1, r2, ip, lsr #7
c000804c: c0118c4c andgts r8, r1, ip, asr #24
c0008050: c0118000 andgts r8, r1, r0
//sp=c0118000,内核空间,启动时,暂时使用,后由init进程使用[gliethttp]
//arm入栈方式:sp先sp-4,之后把数据推入sp-4中
-----------------------------------------------------------------------
3.arch/arm/kernel/Init_task.c
union task_union init_task_union __attribute__((__section__(".init.task"))) =
{ INIT_TASK(init_task_union.task) };
-----------------------
#ifndef INIT_TASK_SIZE
# define INIT_TASK_SIZE 2048*sizeof(long)
#endif
union task_union { //很简单就是告诉编译器占用8k固定空间,供内核使用
struct task_struct task; //task_struct在低地址、stack处在高地址
unsigned long stack[INIT_TASK_SIZE/sizeof(long)];
};
-----------------------
在arch/arm/vmlinux-armv.lds.in
...
. = ALIGN(8192); //8k对齐
.data : {
/*
* first, the init task union, aligned
* to an 8192 byte boundary.
*/
*(.init.task) //8k对齐[gliethttp 2007-07-18]
/*
* then the cacheline aligned data
*/
. = ALIGN(32);
*(.data.cacheline_aligned)
/*
* and the usual data section
*/
*(.data)
CONSTRUCTORS
_edata = .;
}
...
-----------------------------------------------------------------------
4.反汇编init_task_union,以下数值是由INIT_TASK(init_task_union.task)静态编译生成
c0116000 <init_task_union>:
...
c0116010: c0118ee0 andgts r8, r1, r0, ror #29
...
c011601c: ffffffff swinv 0x00ffffff
c0116020: 0000000a andeq r0, r0, sl
...
c0116034: ffffffff swinv 0x00ffffff
c0116038: ffffffff swinv 0x00ffffff
c011603c: c011603c andgts r6, r1, ip, lsr r0
c0116040: c011603c andgts r6, r1, ip, lsr r0
c0116044: 00000000 andeq r0, r0, r0
c0116048: c0116000 andgts r6, r1, r0
c011604c: c0116000 andgts r6, r1, r0
c0116050: c01189ac andgts r8, r1, ip, lsr #19
...
c0116094: c0116000 andgts r6, r1, r0
c0116098: c0116000 andgts r6, r1, r0
...
c01160a8: c01160a8 andgts r6, r1, r8, lsr #1
c01160ac: c01160a8 andgts r6, r1, r8, lsr #1
...
c01160bc: c01160bc ldrgth r6, [r1], -ip
c01160c0: c01160bc ldrgth r6, [r1], -ip
...
c01160f4: c0025b1c andgt r5, r2, ip, lsl fp
...
c01161d4: fffffeff swinv 0x00fffeff
c01161d8: 00000000 andeq r0, r0, r0
c01161dc: ffffffff swinv 0x00ffffff
c01161e0: 00000000 andeq r0, r0, r0
c01161e4: c0119d08 andgts r9, r1, r8, lsl #26
c01161e8: ffffffff swinv 0x00ffffff
c01161ec: ffffffff swinv 0x00ffffff
c01161f0: ffffffff swinv 0x00ffffff
c01161f4: ffffffff swinv 0x00ffffff
c01161f8: ffffffff swinv 0x00ffffff
c01161fc: ffffffff swinv 0x00ffffff
c0116200: 00800000 addeq r0, r0, r0
c0116204: ffffffff swinv 0x00ffffff
c0116208: 00000000 andeq r0, r0, r0
c011620c: ffffffff swinv 0x00ffffff
c0116210: ffffffff swinv 0x00ffffff
c0116214: ffffffff swinv 0x00ffffff
...
c0116220: 00000400 andeq r0, r0, r0, lsl #8
c0116224: 00000400 andeq r0, r0, r0, lsl #8
c0116228: ffffffff swinv 0x00ffffff
c011622c: ffffffff swinv 0x00ffffff
c0116230: ffffffff swinv 0x00ffffff
c0116234: ffffffff swinv 0x00ffffff
c0116238: ffffffff swinv 0x00ffffff
c011623c: ffffffff swinv 0x00ffffff
c0116240: 77730000 ldrvcb r0, [r3, -r0]!
c0116244: 65707061 ldrvsb r7, [r0, -#97]!
c0116248: 00000072 andeq r0, r0, r2, ror r0
...
c011626c: 00000001 andeq r0, r0, r1
...
c0116320: 0000001d andeq r0, r0, sp, lsl r0
c0116324: c01182e0 andgts r8, r1, r0, ror #5
c0116328: c0118304 andgts r8, r1, r4, lsl #6
...
c0116334: c01184a4 andgts r8, r1, r4, lsr #9
...
c0116344: c0116340 andgts r6, r1, r0, asr #6
...
c0118000 <runqueue_lock>:
-----------------------------------------------------------------------
综上可知start_kernel函数中使用的sp值[栈顶]为内核空间的c0118000[我的at91rm9200板sdram起始地址为0x20000000,head-armv.S前4M虚拟内存映射之后,物理内存20118000对应内核虚拟内存c0118000],
看看init线程的创建
init/Main.c->rest_init()->kernel/Fork.c->kernel_thread()->arch/arm/kernel/Process.c->arch_kernel_thread()->arch/arm/kernel/Sys_arm.c->sys_clone();
static void rest_init(void)
{
kernel_thread(init, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);//创建核心进程init
unlock_kernel();
current->need_resched = 1;
cpu_idle();
}
long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
{
struct task_struct *task = current;
unsigned old_task_dumpable;
long ret;
/* lock out any potential ptracer */
task_lock(task);
if (task->ptrace) {
task_unlock(task);
return -EPERM;
}
old_task_dumpable = task->task_dumpable;
task->task_dumpable = 0;
task_unlock(task);
ret = arch_kernel_thread(fn, arg, flags);
//arch_kernel_thread唯一调用处,和μC/OS-II的OSStart()函数性质一样.
/* never reached in child process, only in parent */
current->task_dumpable = old_task_dumpable;
return ret;
}
pid_t arch_kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
{
pid_t __ret;
//r0 = fn = init;
//r1 = arg = null;
//r2 = flags = 0x00010e00 = CLONE_FS | CLONE_FILES | CLONE_SIGNAL;[2007-07-18 gliethttp]
__asm__ __volatile__(
"orr r0, %1, %2 @ kernel_thread sys_clone \n\
mov r1, #0 \n\
"__syscall(clone)" \n\//出发软中断swi,执行sys_clone系统调用
movs %0, r0 @ if we are the child \n\
bne 1f \n\
mov fp, #0 @ ensure that fp is zero \n\
mov r0, %4 \n\
mov lr, pc \n\
mov pc, %3 \n\
b sys_exit \n\
1: "
: "=&r" (__ret)
: "Ir" (flags), "I" (CLONE_VM), "r" (fn), "r" (arg)
: "r0", "r1", "lr");
return __ret;
}
-----------------------
arch/arm/kernel/entry-common.S
sys_clone_wapper:
add r2, sp, #S_OFF[S_OFF=8 gliethttp] //调整r2指针,r2=sp+8,指向pt_regs结构的开始
b SYMBOL_NAME(sys_clone) //对于init线程的创建,该sp处在init_task_union空间
-----------------------
arch/arm/kernel/entry-common.S
...
.align 5
ENTRY(vector_swi)
save_user_regs
zero_fp
get_scno
//ldr r7, [lr,-#4]将中断语句[如:swi 0x0090000b,对应的机器码:0xef90000b]转储到r7中,此时r7=0xef90000b[gliethttp]
arm710_bug_check scno, ip
#ifdef CONFIG_ALIGNMENT_TRAP
ldr ip, __cr_alignment
ldr ip, [ip]
mcr p15, 0, ip, c1, c0 @ update control register
#endif
enable_irq ip
str r4, [sp, #-S_OFF]!
//将r4存放到[sp-8]地址处,同时sp=sp-8[2007-07-19 gliethttp]这样出现了8字节的sp空洞
get_current_task tsk
ldr ip, [tsk, #TSK_PTRACE] @ check for syscall tracing
bic scno, scno, #0xff000000 @ mask off SWI op-code
eor scno, scno, #OS_NUMBER << 20 @ check OS number
adr tbl, sys_call_table @ load syscall table pointer
tst ip, #PT_TRACESYS @ are we tracing syscalls?
bne __sys_trace
adrsvc al, lr, ret_fast_syscall @ return address
cmp scno, #NR_syscalls @ check upper syscall limit
ldrcc pc, [tbl, scno, lsl #2]
//无sp入栈操作的跳转到swi处理函数[gliethttp 2007-07-18]
-----------------------
arch/arm/kernel/entry-header.S
.macro save_user_regs
sub sp, sp, #S_FRAME_SIZE
//sizeof(struct pt_regs)=18*4=72=S_FRAME_SIZE
stmia sp, {r0 - r12}
//顺序存储r0~r12到sp,sp+1,...,sp+12,sp值不变
add r8, sp, #S_PC
//r8=sp+#S_PC=sp+60
stmdb r8, {sp, lr}^ //按顺序分别将lr,sp存入r8-4和r8-8地址中
mrs r8, spsr //spsr暂存到r8
str lr, [sp, #S_PC] //将lr存入sp+#S_PC
str r8, [sp, #S_PSR] //将spsr存入sp+#S_PSR
str r0, [sp, #S_OLD_R0] //将r0存入sp+#S_OLD_R0
.endm
-----------------------
asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp, struct pt_regs *regs)
{
//r0 = clone_flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGNAL
//r1 = newsp = 0
//r2 = 通过上面分析r2是指向存储到sp栈中的pt_regs结构的栈指针起始值[2007-07-18 gliethttp]
if (!newsp)//对于init进程的创建,sp栈和regs处在init_task_union空间
newsp = regs->ARM_sp;
//即:regs->uregs[13](2007-07-18 gliethttp)
return do_fork(clone_flags, newsp, regs, 0);
}
-----------------------------------------------------------------------
include/asm-arm/proc-armv/Ptrace.h定义了regs结构
struct pt_regs {
long uregs[18];
};
#define ARM_cpsr uregs[16]
#define ARM_pc uregs[15]
#define ARM_lr uregs[14]
#define ARM_sp uregs[13]
#define ARM_ip uregs[12]
#define ARM_fp uregs[11]
#define ARM_r10 uregs[10]
#define ARM_r9 uregs[9]
#define ARM_r8 uregs[8]
#define ARM_r7 uregs[7]
#define ARM_r6 uregs[6]
#define ARM_r5 uregs[5]
#define ARM_r4 uregs[4]
#define ARM_r3 uregs[3]
#define ARM_r2 uregs[2]
#define ARM_r1 uregs[1]
#define ARM_r0 uregs[0]
#define ARM_ORIG_r0 uregs[17]
-----------------------------------------------------------------------
rest_init()->kernel_thread()->arch_kernel_thread()->sys_clone()->do_fork()->copy_thread()
int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
unsigned long unused, struct task_struct * p, struct pt_regs * regs)
{
struct pt_regs *childregs;
struct context_save_struct * save;
atomic_set(&p->thread.refcount, 1);
childregs = ((struct pt_regs *)((unsigned long)p + 8192 - 8)) - 1;
//(char*)&p[8192]-sizeof(pt_regs),即:(char*)&p[8192]-18*4[gliethttp]
*childregs = *regs;
childregs->ARM_r0 = 0;
childregs->ARM_sp = esp;
save = ((struct context_save_struct *)(childregs)) - 1;
*save = INIT_CSS; //新线程的默认cpsr,r4,r5,r6,r7,r8,r9,sl,fp,pc设置
save->pc |= (unsigned long)ret_from_fork; //返回函数
p->thread.save = save; //供__switch_to进程切换使用
return 0;
}
-----------------------------------------------------------------------
系统调用swi返回
arch/arm/kernel/entry-common.S
...
.align 5
/*
* This is the fast syscall return path. We do as little as
* possible here, and this includes saving r0 back into the SVC
* stack.
*/
ret_fast_syscall:
disable_irq r1 @ ensure IRQs are disabled
ldr r1, [tsk, #TSK_NEED_RESCHED]
ldr r2, [tsk, #TSK_SIGPENDING]
teq r1, #0 @ need_resched || sigpending
teqeq r2, #0
bne slow
fast_restore_user_regs
...
-----------------------
arch/arm/kernel/entry-header.S
/*
* Must be called with IRQs already disabled.
*/
.macro fast_restore_user_regs
ldr r1, [sp, #S_OFF + S_PSR] @ get calling cpsr
ldr lr, [sp, #S_OFF + S_PC]! @ get pc
msr spsr, r1 @ save in spsr_svc
ldmdb sp, {r1 - lr}^ //恢复各寄存器
mov r0, r0
add sp, sp, #S_FRAME_SIZE - S_PC
movs pc, lr //返回系统调用处,继续执行
.endm
能力所限,就到这里了,后边的继续研究.另外用户栈的创建,粗略的过一下,免得忘了:
-----------------------------------------------------------------------
########
摘:
将参数栈加入到进程的虚拟地址空间。
进程的用户栈的栈底在0xC0000000(3G)处,用户栈从此处开始向下增长。进程的参数栈在它的参数数据结构linux_binprm的page表中,而且参数所占用的物理页已经分配。因为目前要放入用户栈中的只有在参数栈中的进程运行所需要的参数,而且用户栈的大小也无法预先确定,所以,此处先根据参数栈的大小建立用户栈。如果进程在以后的运行中需要更大的栈空间(PUSH操作时,栈指针esp越界),处理器会产生page fault异常,系统中的page fault异常处理程序do_page_fault会向下扩展用户栈空间。
根据参数栈的大小建立一个内存区域数据结构vm_area_struct,将已分配的参数页逐个加入到进程的页目录和页表中。
◆申请一块内存用来建立vm_area_struct数据结构,该内存区域的地址范围是[3G - 参数栈大小,3G],其标志为VM_STACK_FLAGS(向下增长,可读、写、执行、共享等),该区域没有规定操作,没有对应的文件;
◆因为当前页目录中用户页表部分全为空,所以根据用户参数栈各页的虚拟地址首先分配相应的页表,而后将各参数栈页对应的页表项设为脏页,填入相应的页表中;
◆设置当前进程用户栈的开始位置为当前的栈顶:
mm->start_stack。
########
load_elf_binary()->setup_arg_pages(bprm)
int setup_arg_pages(struct linux_binprm *bprm)
{
unsigned long stack_base;
struct vm_area_struct *mpnt;
int i;
/*STACK_TOP = 0xC00000000 = 3G
#ifdef __KERNEL__
#define STACK_TOP ((current->personality == PER_LINUX_32BIT) ? \
TASK_SIZE : TASK_SIZE_26)
#endif
*/
stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
bprm->p += stack_base;
if (bprm->loader)
bprm->loader += stack_base;
bprm->exec += stack_base;
mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
if (!mpnt)
return -ENOMEM;
down_write(¤t->mm->mmap_sem);
{
mpnt->vm_mm = current->mm;
mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
mpnt->vm_end = STACK_TOP; //0xC0000000 = 3G
mpnt->vm_page_prot = PAGE_COPY;
mpnt->vm_flags = VM_STACK_FLAGS; //设置该VMA为VM_STACK_FLAGS标志
mpnt->vm_ops = NULL;
mpnt->vm_pgoff = 0;
mpnt->vm_file = NULL;
mpnt->vm_private_data = (void *) 0;
insert_vm_struct(current->mm, mpnt);
current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
}
for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
struct page *page = bprm->page[i];
if (page) {
bprm->page[i] = NULL;
put_dirty_page(current,page,stack_base);
}
stack_base += PAGE_SIZE;
}
up_write(¤t->mm->mmap_sem);
return 0;
}
-----------------------------------------------------------------------
load_elf_binary()->setup_arg_pages(bprm)->start_thread(regs, elf_entry, bprm->p)
#define start_thread(regs,pc,sp) \
({ \
unsigned long *stack = (unsigned long *)sp; \
set_fs(USER_DS); \
memzero(regs->uregs, sizeof(regs->uregs)); \
if (current->personality & ADDR_LIMIT_32BIT) \
regs->ARM_cpsr = USR_MODE; \
else \
regs->ARM_cpsr = USR26_MODE; \
regs->ARM_pc = pc; /* pc */ \
regs->ARM_sp = sp; /* sp */ \//系统调用返回时,将regs中数据设置生效
regs->ARM_r2 = stack[2]; /* r2 (envp) */ \
regs->ARM_r1 = stack[1]; /* r1 (argv) */ \
regs->ARM_r0 = stack[0]; /* r0 (argc) */ \
})
完.