Chinaunix首页 | 论坛 | 博客
  • 博客访问: 259340
  • 博文数量: 130
  • 博客积分: 4012
  • 博客等级: 上校
  • 技术积分: 2030
  • 用 户 组: 普通用户
  • 注册时间: 2010-01-10 10:40
文章分类

全部博文(130)

文章存档

2010年(130)

我的朋友

分类: LINUX

2010-01-10 17:16:23

浅析armlinux-sp的孵化流程,1号内核线程init的创建

文章来源:http://gliethttp.cublog.cn

接续上一篇《浅析armlinux-sp进程切换栈结构和切换函数__switch_to(),研究一下sp从内核系统启动到内核线程init启动的变化过程.
  当系统启动的时候,她运行在核心态,这时,系统中只有一个进程:初始化进程(init_task).象所有其它进程一样,初始化进程有一个堆栈、寄存器等表示的
机器状态(TSS).当系统中其它进程运行时,这些信息保存在初始化进程的task_struct数据结构中.在系统初始化结束时,初始化进程创建并启动一个核心线程(init),
然后自己进入空循环(idle).当系统中没有其它可以运行的进程时,调度程序会运行这个空闲进程.这个空闲进程的task_struct,:init_task_union,是唯一一个不是动态分配,而是
在内核连接时静态定义的结构,为了不至于混淆,该进程叫做init_task.
  空闲进程init_task的进程标识符pid是0,核心进程init的进程标识符pid是1.init是系统中第一个真正的进程,它执行一些系统初始化设置.
-----------------------------------------------------------------------
1.arch/arm/kernel/head-armv.S
//init_task的静态栈空间
...
        .type    __switch_data, %object
__switch_data:    .long    __mmap_switched
        .long    SYMBOL_NAME(__bss_start)
        .long    SYMBOL_NAME(_end)
        .long    SYMBOL_NAME(processor_id)
        .long    SYMBOL_NAME(__machine_arch_type)
        .long    SYMBOL_NAME(cr_alignment)
        .long    SYMBOL_NAME(init_task_union)+8192
...
        .align    5
__mmap_switched:
        adr    r3, __switch_data + 4
        ldmia    r3, {r4, r5, r6, r7, r8, sp}@ r2 = compat
                                             @ sp = stack pointer
        mov    fp, #0                        @ Clear BSS (and zero fp)
1:      cmp    r4, r5
        strcc    fp, [r4],#4
        bcc    1b
        str    r9, [r6]                      @ Save processor ID
        str    r1, [r7]                      @ Save machine type
#ifdef CONFIG_ALIGNMENT_TRAP
        orr    r0, r0, #2                    @ ...........A.
#endif
        bic    r2, r0, #2                    @ Clear 'A' bit
        stmia    r8, {r0, r2}                @ Save control register values
        b    SYMBOL_NAME(start_kernel)
...
-----------------------------------------------------------------------
2.反汇编__switch_data数据
c0008038 <__switch_data>:
c0008038:    c0008080     andgt    r8, r0, r0, lsl #1
c000803c:    c0120660     andgts    r0, r2, r0, ror #12
c0008040:    c01415c8     andgts    r1, r4, r8, asr #11
c0008044:    c01213b8     ldrgth    r1, [r2], -r8
c0008048:    c01213ac     andgts    r1, r2, ip, lsr #7
c000804c:    c0118c4c     andgts    r8, r1, ip, asr #24
c0008050:    c0118000     andgts    r8, r1, r0                    

//sp=c0118000,内核空间,启动时,暂时使用,后由init进程使用[gliethttp]
//arm入栈方式:sp先sp-4,之后把数据推入sp-4中
-----------------------------------------------------------------------
3.arch/arm/kernel/Init_task.c
union task_union init_task_union __attribute__((__section__(".init.task"))) =
        { INIT_TASK(init_task_union.task) };
-----------------------
#ifndef INIT_TASK_SIZE
# define INIT_TASK_SIZE    2048*sizeof(long)
#endif
union task_union {                                         //很简单就是告诉编译器占用8k固定空间,供内核使用
    struct task_struct task;                               //task_struct在低地址、stack处在高地址
    unsigned long stack[INIT_TASK_SIZE/sizeof(long)];
};
-----------------------
在arch/arm/vmlinux-armv.lds.in
...
. = ALIGN(8192);                                           //8k对齐
    .data : {
        
/*
         * first, the init task union, aligned
         * to an 8192 byte boundary.
         */

        *(.init.task)                                      //8k对齐[gliethttp 2007-07-18]
        
/*
         * then the cacheline aligned data
         */

        . = ALIGN(32);
        *(.data.cacheline_aligned)
        
/*
         * and the usual data section
         */

        *(.data)
        CONSTRUCTORS
        _edata = .;
    }
...
-----------------------------------------------------------------------
4.反汇编init_task_union,以下数值是由INIT_TASK(init_task_union.task)静态编译生成
c0116000 <init_task_union>:
    ...
c0116010:    c0118ee0     andgts    r8, r1, r0, ror #29
    ...
c011601c:    ffffffff     swinv    0x00ffffff
c0116020:    0000000a     andeq    r0, r0, sl
    ...
c0116034:    ffffffff     swinv    0x00ffffff
c0116038:    ffffffff     swinv    0x00ffffff
c011603c:    c011603c     andgts    r6, r1, ip, lsr r0
c0116040:    c011603c     andgts    r6, r1, ip, lsr r0
c0116044:    00000000     andeq    r0, r0, r0
c0116048:    c0116000     andgts    r6, r1, r0
c011604c:    c0116000     andgts    r6, r1, r0
c0116050:    c01189ac     andgts    r8, r1, ip, lsr #19
    ...
c0116094:    c0116000     andgts    r6, r1, r0
c0116098:    c0116000     andgts    r6, r1, r0
    ...
c01160a8:    c01160a8     andgts    r6, r1, r8, lsr #1
c01160ac:    c01160a8     andgts    r6, r1, r8, lsr #1
    ...
c01160bc:    c01160bc     ldrgth    r6, [r1], -ip
c01160c0:    c01160bc     ldrgth    r6, [r1], -ip
    ...
c01160f4:    c0025b1c     andgt    r5, r2, ip, lsl fp
    ...
c01161d4:    fffffeff     swinv    0x00fffeff
c01161d8:    00000000     andeq    r0, r0, r0
c01161dc:    ffffffff     swinv    0x00ffffff
c01161e0:    00000000     andeq    r0, r0, r0
c01161e4:    c0119d08     andgts    r9, r1, r8, lsl #26
c01161e8:    ffffffff     swinv    0x00ffffff
c01161ec:    ffffffff     swinv    0x00ffffff
c01161f0:    ffffffff     swinv    0x00ffffff
c01161f4:    ffffffff     swinv    0x00ffffff
c01161f8:    ffffffff     swinv    0x00ffffff
c01161fc:    ffffffff     swinv    0x00ffffff
c0116200:    00800000     addeq    r0, r0, r0
c0116204:    ffffffff     swinv    0x00ffffff
c0116208:    00000000     andeq    r0, r0, r0
c011620c:    ffffffff     swinv    0x00ffffff
c0116210:    ffffffff     swinv    0x00ffffff
c0116214:    ffffffff     swinv    0x00ffffff
    ...
c0116220:    00000400     andeq    r0, r0, r0, lsl #8
c0116224:    00000400     andeq    r0, r0, r0, lsl #8
c0116228:    ffffffff     swinv    0x00ffffff
c011622c:    ffffffff     swinv    0x00ffffff
c0116230:    ffffffff     swinv    0x00ffffff
c0116234:    ffffffff     swinv    0x00ffffff
c0116238:    ffffffff     swinv    0x00ffffff
c011623c:    ffffffff     swinv    0x00ffffff
c0116240:    77730000     ldrvcb    r0, [r3, -r0]!
c0116244:    65707061     ldrvsb    r7, [r0, -#97]!
c0116248:    00000072     andeq    r0, r0, r2, ror r0
    ...
c011626c:    00000001     andeq    r0, r0, r1
    ...
c0116320:    0000001d     andeq    r0, r0, sp, lsl r0
c0116324:    c01182e0     andgts    r8, r1, r0, ror #5
c0116328:    c0118304     andgts    r8, r1, r4, lsl #6
    ...
c0116334:    c01184a4     andgts    r8, r1, r4, lsr #9
    ...
c0116344:    c0116340     andgts    r6, r1, r0, asr #6
    ...
c0118000 <runqueue_lock>:
-----------------------------------------------------------------------
综上可知start_kernel函数中使用的sp值[栈顶]为内核空间的c0118000[我的at91rm9200板sdram起始地址为0x20000000,head-armv.S前4M虚拟内存映射之后,物理内存20118000对应内核虚拟内存c0118000],
看看init线程的创建
init/Main.c->rest_init()->kernel/Fork.c->kernel_thread()->arch/arm/kernel/Process.c->arch_kernel_thread()->arch/arm/kernel/Sys_arm.c->sys_clone();
static void rest_init(void)
{
    kernel_thread(init, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);//创建核心进程init
    unlock_kernel();
    current->need_resched = 1;
     cpu_idle();
}
long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
{
    struct task_struct *task = current;
    unsigned old_task_dumpable;
    long ret;
    /* lock out any potential ptracer */
    task_lock(task);
    if (task->ptrace) {
        task_unlock(task);
        return -EPERM;
    }
    old_task_dumpable = task->task_dumpable;
    task->task_dumpable = 0;
    task_unlock(task);
    ret = arch_kernel_thread(fn, arg, flags);                

//arch_kernel_thread唯一调用处,和μC/OS-II的OSStart()函数性质一样.
    /* never reached in child process, only in parent */
    current->task_dumpable = old_task_dumpable;
    return ret;
}
pid_t arch_kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
{
    pid_t __ret;
//r0 = fn = init;
//r1 = arg = null;
//r2 = flags = 0x00010e00 = CLONE_FS | CLONE_FILES | CLONE_SIGNAL;[2007-07-18 gliethttp]
    __asm__ __volatile__(
    
"orr    r0, %1, %2    @ kernel_thread sys_clone    \n\
    mov    r1, #0                                      \n\
    "
__syscall(clone)
"                                 \n\//出发软中断swi,执行sys_clone系统调用
    movs    %0, r0        @ if we are the child        \n\
    bne    1f                                          \n\
    mov    fp, #0         @ ensure that fp is zero     \n\
    mov    r0, %4                                      \n\
    mov    lr, pc                                      \n\
    mov    pc, %3                                      \n\
    b    sys_exit                                      \n\
1:    "

        : "=&r" (__ret)
        : "Ir" (flags), "I" (CLONE_VM), "r" (fn), "r" (arg)
    : "r0", "r1", "lr");
    return __ret;
}
-----------------------
arch/arm/kernel/entry-common.S
sys_clone_wapper:
        add    r2, sp, #S_OFF[S_OFF=8 gliethttp]           //调整r2指针,r2=sp+8,指向pt_regs结构的开始
        b    SYMBOL_NAME(sys_clone)                         //对于init线程的创建,该sp处在init_task_union空间
-----------------------
arch/arm/kernel/entry-common.S
...
.align    5
ENTRY(vector_swi)
    save_user_regs
    zero_fp
    get_scno

//ldr r7, [lr,-#4]将中断语句[如:swi 0x0090000b,对应的机器码:0xef90000b]转储到r7中,此时r7=0xef90000b[gliethttp]
    arm710_bug_check scno, ip
#ifdef CONFIG_ALIGNMENT_TRAP
    ldr    ip, __cr_alignment
    ldr    ip, [ip]
    mcr    p15, 0, ip, c1, c0        @ update control register
#endif
    enable_irq ip
    str    r4, [sp, #-S_OFF]!

//将r4存放到[sp-8]地址处,同时sp=sp-8[2007-07-19 gliethttp]这样出现了8字节的sp空洞
    get_current_task tsk
    ldr    ip, [tsk, #TSK_PTRACE]         @ check for syscall tracing
    bic    scno, scno, #0xff000000        @ mask off SWI op-code
    eor    scno, scno, #OS_NUMBER << 20   @ check OS number
    adr    tbl, sys_call_table            @ load syscall table pointer
    tst    ip, #PT_TRACESYS               @ are we tracing syscalls?
    bne    __sys_trace

    adrsvc    al, lr, ret_fast_syscall    @ return address
    cmp    scno, #NR_syscalls             @ check upper syscall limit
    ldrcc    pc, [tbl, scno, lsl #2]

//无sp入栈操作的跳转到swi处理函数[gliethttp 2007-07-18]
-----------------------
arch/arm/kernel/entry-header.S
    .macro    save_user_regs
    sub    sp, sp, #S_FRAME_SIZE        

//sizeof(struct pt_regs)=18*4=72=S_FRAME_SIZE
    stmia    sp, {r0 - r12}           

//顺序存储r0~r12到sp,sp+1,...,sp+12,sp值不变
    add    r8, sp, #S_PC

//r8=sp+#S_PC=sp+60
    stmdb    r8, {sp, lr}^                                 //按顺序分别将lr,sp存入r8-4和r8-8地址中
    mrs    r8, spsr                                        //spsr暂存到r8
    str    lr, [sp, #S_PC]                                 //将lr存入sp+#S_PC
    str    r8, [sp, #S_PSR]                                //将spsr存入sp+#S_PSR
    str    r0, [sp, #S_OLD_R0]                             //将r0存入sp+#S_OLD_R0
    .endm
-----------------------
asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp, struct pt_regs *regs)
{
//r0 = clone_flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGNAL
//r1 = newsp = 0
//r2 = 通过上面分析r2是指向存储到sp栈中的pt_regs结构的栈指针起始值[2007-07-18 gliethttp]
    if (!newsp)//对于init进程的创建,sp栈和regs处在init_task_union空间
        newsp = regs->ARM_sp;                              

//即:regs->uregs[13](2007-07-18 gliethttp)
    return do_fork(clone_flags, newsp, regs, 0);
}
-----------------------------------------------------------------------
include/asm-arm/proc-armv/Ptrace.h定义了regs结构
struct pt_regs {
    long uregs[18];
};

#define ARM_cpsr    uregs[16]
#define ARM_pc        uregs[15]
#define ARM_lr        uregs[14]
#define ARM_sp        uregs[13]
#define ARM_ip        uregs[12]
#define ARM_fp        uregs[11]
#define ARM_r10        uregs[10]
#define ARM_r9        uregs[9]
#define ARM_r8        uregs[8]
#define ARM_r7        uregs[7]
#define ARM_r6        uregs[6]
#define ARM_r5        uregs[5]
#define ARM_r4        uregs[4]
#define ARM_r3        uregs[3]
#define ARM_r2        uregs[2]
#define ARM_r1        uregs[1]
#define ARM_r0        uregs[0]
#define ARM_ORIG_r0    uregs[17]
-----------------------------------------------------------------------
rest_init()->kernel_thread()->arch_kernel_thread()->sys_clone()->do_fork()->copy_thread()
int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
     unsigned long unused, struct task_struct * p, struct pt_regs * regs)
{
    struct pt_regs *childregs;
    struct context_save_struct * save;
    atomic_set(&p->thread.refcount, 1);
    childregs = ((struct pt_regs *)((unsigned long)p + 8192 - 8)) - 1;

//(char*)&p[8192]-sizeof(pt_regs),即:(char*)&p[8192]-18*4[gliethttp]
    *childregs = *regs;                                        

    childregs->ARM_r0 = 0;
    childregs->ARM_sp = esp;
    
    save = ((struct context_save_struct *)(childregs)) - 1;
    *save = INIT_CSS;                                      //新线程的默认cpsr,r4,r5,r6,r7,r8,r9,sl,fp,pc设置

    save->pc |= (unsigned long)ret_from_fork;              //返回函数
    p->thread.save = save;                                 //供__switch_to进程切换使用

    return 0;
}
-----------------------------------------------------------------------
系统调用swi返回
arch/arm/kernel/entry-common.S
...
    .align    5
/*
 * This is the fast syscall return path. We do as little as
 * possible here, and this includes saving r0 back into the SVC
 * stack.
 */

ret_fast_syscall:
    disable_irq r1                @ ensure IRQs are disabled
    ldr    r1, [tsk, #TSK_NEED_RESCHED]
    ldr    r2, [tsk, #TSK_SIGPENDING]
    teq    r1, #0                 @ need_resched || sigpending
    teqeq    r2, #0
    bne    slow
    fast_restore_user_regs
...
-----------------------
arch/arm/kernel/entry-header.S
/*
 * Must be called with IRQs already disabled.
 */

    .macro    fast_restore_user_regs
    ldr    r1, [sp, #S_OFF + S_PSR]    @ get calling cpsr
    ldr    lr, [sp, #S_OFF + S_PC]!    @ get pc
    msr    spsr, r1                    @ save in spsr_svc
    ldmdb    sp, {r1 - lr}^                                //恢复各寄存器
    mov    r0, r0
    add    sp, sp, #S_FRAME_SIZE - S_PC
    movs    pc, lr                                         //返回系统调用处,继续执行
    .endm
能力所限,就到这里了,后边的继续研究.另外用户栈的创建,粗略的过一下,免得忘了:
-----------------------------------------------------------------------
########
:
将参数栈加入到进程的虚拟地址空间。
进程的用户栈的栈底在0xC0000000(3G)处,用户栈从此处开始向下增长。进程的参数栈在它的参数数据结构linux_binprm的page表中,而且参数所占用的物理页已经分配。因为目前要放入用户栈中的只有在参数栈中的进程运行所需要的参数,而且用户栈的大小也无法预先确定,所以,此处先根据参数栈的大小建立用户栈。如果进程在以后的运行中需要更大的栈空间(PUSH操作时,栈指针esp越界),处理器会产生page fault异常,系统中的page fault异常处理程序do_page_fault会向下扩展用户栈空间。
根据参数栈的大小建立一个内存区域数据结构vm_area_struct,将已分配的参数页逐个加入到进程的页目录和页表中。
◆申请一块内存用来建立vm_area_struct数据结构,该内存区域的地址范围是[3G - 参数栈大小,3G],其标志为VM_STACK_FLAGS(向下增长,可读、写、执行、共享等),该区域没有规定操作,没有对应的文件;
◆因为当前页目录中用户页表部分全为空,所以根据用户参数栈各页的虚拟地址首先分配相应的页表,而后将各参数栈页对应的页表项设为脏页,填入相应的页表中;
◆设置当前进程用户栈的开始位置为当前的栈顶:
 mm->start_stack。
########
load_elf_binary()->setup_arg_pages(bprm)
int setup_arg_pages(struct linux_binprm *bprm)
{
    unsigned long stack_base;
    struct vm_area_struct *mpnt;
    int i;
/*STACK_TOP = 0xC00000000 = 3G
#ifdef __KERNEL__
#define STACK_TOP    ((current->personality == PER_LINUX_32BIT) ? \
             TASK_SIZE : TASK_SIZE_26)
#endif
*/

    stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
    bprm->p += stack_base;
    if (bprm->loader)
        bprm->loader += stack_base;
    bprm->exec += stack_base;
    mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
    if (!mpnt)
        return -ENOMEM; 
    down_write(&current->mm->mmap_sem);
    {
        mpnt->vm_mm = current->mm;
        mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
        mpnt->vm_end = STACK_TOP;                          //0xC0000000 = 3G
        mpnt->vm_page_prot = PAGE_COPY;
        mpnt->vm_flags = VM_STACK_FLAGS;                   //设置该VMA为VM_STACK_FLAGS标志
        mpnt->vm_ops = NULL;
        mpnt->vm_pgoff = 0;
        mpnt->vm_file = NULL;
        mpnt->vm_private_data = (void *) 0;
        insert_vm_struct(current->mm, mpnt);
        current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
    } 
    for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
        struct page *page = bprm->page[i];
        if (page) {
            bprm->page[i] = NULL;
            put_dirty_page(current,page,stack_base);
        }
        stack_base += PAGE_SIZE;
    }
    up_write(&current->mm->mmap_sem);
    return 0;
}
-----------------------------------------------------------------------
load_elf_binary()->setup_arg_pages(bprm)->start_thread(regs, elf_entry, bprm->p)
#define start_thread(regs,pc,sp)                           \
({                                                         \
    unsigned long *stack = (unsigned long *)sp;            \
    set_fs(USER_DS);                                       \
    memzero(regs->uregs, sizeof(regs->uregs));             \
    if (current->personality & ADDR_LIMIT_32BIT)           \
        regs->ARM_cpsr = USR_MODE;                         \
    else                                                   \
        regs->ARM_cpsr = USR26_MODE;                       \
    regs->ARM_pc = pc;        /* pc */                     \
    regs->ARM_sp = sp;        /* sp */                     \//系统调用返回时,将regs中数据设置生效
    regs->ARM_r2 = stack[2];    /* r2 (envp) */            \
    regs->ARM_r1 = stack[1];    /* r1 (argv) */            \
    regs->ARM_r0 = stack[0];    /* r0 (argc) */            \
})

完.

阅读(444) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~