Chinaunix首页 | 论坛 | 博客
  • 博客访问: 517676
  • 博文数量: 95
  • 博客积分: 5168
  • 博客等级: 大校
  • 技术积分: 1271
  • 用 户 组: 普通用户
  • 注册时间: 2008-12-28 23:31
文章分类

全部博文(95)

文章存档

2013年(2)

2012年(3)

2011年(1)

2010年(8)

2009年(81)

分类:

2009-11-27 19:48:41

-------------------------------------------
本文系本站原创,欢迎转载!
转载请注明出处:
http://sjj0412.cublog.cn
-------------------------------------------

startup_32:

    cld

    cli

    movl $(KERNEL_DS),%eax

    mov %ax,%ds

    mov %ax,%es

    mov %ax,%fs

    mov %ax,%gs

#ifdef __SMP__

    orw %bx,%bx         # What state are we in BX=1 for SMP

                    # 0 for boot

    jz  2f              # Initial boot

//根据bx值指示是主cpu(bx=0)还是次cpu(bx=1)

//然后会有不同的执行路径

/*

 *  We are trampolining an SMP processor

 *//这里是其他次cpu执行路径

    mov %ax,%ss

    xorl %eax,%eax          # Back to 0

    mov  %cx,%ax        # SP low 16 bits

    movl %eax,%esp

    pushl 0             # Clear NT

    popfl

    ljmp $(KERNEL_CS), $0x100000    # Into C and sanity

 

2://这里是主cpu的执行路径

#endif

    lss SYMBOL_NAME(stack_start),%esp

    xorl %eax,%eax

1:  incl %eax       # check that A20 really IS enabled

    movl %eax,0x000000  # loop forever if it isn't

    cmpl %eax,0x100000

    je 1b

/*

 * Initialize eflags.  Some BIOS's leave bits like NT set.  This would

 * confuse the debugger if this code is traced.

 * XXX - best to initialize before switching to protected mode.

 */

    pushl $0

    popfl

/*

 * Clear BSS

 */

    xorl %eax,%eax

    movl $ SYMBOL_NAME(_edata),%edi

    movl $ SYMBOL_NAME(_end),%ecx

    subl %edi,%ecx

    cld

    rep

    stosb

/*

 * Do the decompression, and jump to the new kernel..

 */

    subl $16,%esp   # place for structure on the stack

    pushl %esp  # address of structure as first arg

    call SYMBOL_NAME(decompress_kernel)

    orl  %eax,%eax

    jnz  3f

    xorl %ebx,%ebx

    ljmp $(KERNEL_CS), $0x100000

 

 

 

 

ljmp $(KERNEL_CS), $0x100000

这个其实就是跳到start_kernel函数。

 

asmlinkage void start_kernel(void)

{

    char * command_line;

 

/*

 *  This little check will move.

 */

 

#ifdef __SMP__

    static int first_cpu=1;

    //这个不是函数局部变量,是函数静态变量,主cpu执行这个函数时复位为1,其他cpu0,因为主cpu总是第一个执行这个函数的

    if(!first_cpu)

        start_secondary();

//对于

    first_cpu=0;

   

#endif 

/*

 * Interrupts are still disabled. Do necessary setups, then

 * enable them

 */

    setup_arch(&command_line, &memory_start, &memory_end);

    memory_start = paging_init(memory_start,memory_end);

    trap_init();

    init_IRQ();

    sched_init();

    time_init();

    parse_options(command_line);

#ifdef CONFIG_MODULES

    init_modules();

#endif

#ifdef CONFIG_PROFILE

    if (!prof_shift)

#ifdef CONFIG_PROFILE_SHIFT

        prof_shift = CONFIG_PROFILE_SHIFT;

#else

        prof_shift = 2;

#endif

#endif

    if (prof_shift) {

        prof_buffer = (unsigned int *) memory_start;

        /* only text is profiled */

        prof_len = (unsigned long) &_etext - (unsigned long) &_stext;

        prof_len >>= prof_shift;

        memory_start += prof_len * sizeof(unsigned int);

    }

    memory_start = console_init(memory_start,memory_end);

#ifdef CONFIG_PCI

    memory_start = pci_init(memory_start,memory_end);

#endif

    memory_start = kmalloc_init(memory_start,memory_end);

    sti();

    calibrate_delay();

    memory_start = inode_init(memory_start,memory_end);

    memory_start = file_table_init(memory_start,memory_end);

    memory_start = name_cache_init(memory_start,memory_end);

#ifdef CONFIG_BLK_DEV_INITRD

    if (initrd_start && initrd_start < memory_start) {

        printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - "

            "disabling it.\n",initrd_start,memory_start);

        initrd_start = 0;

    }

#endif

    mem_init(memory_start,memory_end);

    buffer_init();

    sock_init();

#if defined(CONFIG_SYSVIPC) || defined(CONFIG_KERNELD)

    ipc_init();

#endif

    dquot_init();

    arch_syms_export();

    sti();

    check_bugs();

 

    printk(linux_banner);

#ifdef __SMP__

    smp_init();

#endif

    sysctl_init();

    /*

     *  We count on the initial thread going ok

     *  Like idlers init is an unlocked kernel thread, which will

     *  make syscalls (and thus be locked).

     */

    kernel_thread(init, NULL, 0);

/*

 * task[0] is meant to be used as an "idle" task: it may not sleep, but

 * it might do some general things like count free pages or it could be

 * used to implement a reasonable LRU algorithm for the paging routines:

 * anything that can be useful, but shouldn't take time from the real

 * processes.

 *

 * Right now task[0] just does a infinite idle loop.

 */

    cpu_idle(NULL);

}

 

 

 

asmlinkage void start_secondary(void)

{

    trap_init();

    init_IRQ();

//初始化自己的irq

    smp_callin();

//这个等待主cpu给大家发送开始信号

    cpu_idle(NULL);

//这个是ide进程。

}

void smp_callin(void)

{

    extern void calibrate_delay(void);

    int cpuid=GET_APIC_ID(apic_read(APIC_ID));

    unsigned long l;

   

    /*

     *  Activate our APIC

     */

     

    SMP_PRINTK(("CALLIN %d\n",smp_processor_id()));

    l=apic_read(APIC_SPIV);

    l|=(1<<8);      /* Enable */

    apic_write(APIC_SPIV,l);

    sti();

    /*

     *  Get our bogomips.

     */

    calibrate_delay();

    /*

     *  Save our processor parameters

     */

    smp_store_cpu_info(cpuid);

    /*

     *  Allow the master to continue.

     */

    set_bit(cpuid, (unsigned long *)&cpu_callin_map[0]);

    /*

     *  Until we are ready for SMP scheduling

     */

    load_ldt(0);

/*  printk("Testing faulting...\n");

    *(long *)0=1;       OOPS... */

    local_flush_tlb();

    while(!smp_commenced);

//这个可以看成是自旋锁,等待主cpusmp_commenced信号即开始信号。

    if (cpu_number_map[cpuid] == -1)

        while(1);

    local_flush_tlb();

    SMP_PRINTK(("Commenced..\n"));

   

    load_TR(cpu_number_map[cpuid]);

/*  while(1);*/

}

int cpu_idle(void *unused)

{

    for(;;)

        idle();

}

主cpu给各次cpu发开始信号是在init函数中调用smp_begin函数:

static void smp_begin(){

smp_threads_ready=1;

smp_commence();

//这个会通过IPI给各个次cpu发送相关中断来通信

}

每个cpu有一个current指针。

刚开始的时候由主cpu赋值为init_task;

在主cpu调用 sched_init赋值。

void sched_init(void)

{

    /*

     *  We have to do a little magic to get the first

     *  process right in SMP mode.

     */

    int cpu=smp_processor_id();//这个为0,因为是主cpu才调用。

#ifndef __SMP__

    current_set[cpu]=&init_task;

#else

    init_task.processor=cpu;

//这个是将init_task标志为主cpu在运行。

    for(cpu = 0; cpu < NR_CPUS; cpu++)

        current_set[cpu] = &init_task;

#endif

    init_bh(TIMER_BH, timer_bh);

    init_bh(TQUEUE_BH, tqueue_bh);

    init_bh(IMMEDIATE_BH, immediate_bh);

}

同时这些还会在 smp_init丰富。

static void smp_init(void)

{

    int i, j;

    smp_boot_cpus();

   

    /*

     *  Create the slave init tasks as sharing pid 0.

     *

     *  This should only happen if we have virtual CPU numbers

     *  higher than 0.

     */

 

    for (i=1; i

    {

        struct task_struct *n, *p;

 

        j = cpu_logical_map[i];

        /*

         *  We use kernel_thread for the idlers which are

         *  unlocked tasks running in kernel space.

         */

        kernel_thread(cpu_idle, NULL, CLONE_PID);

//这个其实就是创建线程然后这个线程体现在task[i]上了,因为创建的时候的task_struct就是从taski]取的。

        /*

         *  Don't assume linear processor numbering

         */

        current_set[j]=task[i];

        current_set[j]->processor=j;

        cli();

        n = task[i]->next_run;

        p = task[i]->prev_run;

        nr_running--;

        n->prev_run = p;

        p->next_run = n;

        task[i]->next_run = task[i]->prev_run = task[i];

        sti();

    }

}

上面执行完后就给每个cpu加了一个idle任务。

 

然后kernel_thread(init, NULL, 0)创建的init任务。

 

//每个cpu在时间中断时都可能调用这个共同的函数。

asmlinkage void schedule(void)

{

    int c;

    struct task_struct * p;

    struct task_struct * prev, * next;

    unsigned long timeout = 0;

    int this_cpu=smp_processor_id();

//获取cpu_id;

/* check alarm, wake up any interruptible tasks that have got a signal */

 

    if (intr_count)

        goto scheduling_in_interrupt;

 

    if (bh_active & bh_mask) {

        intr_count = 1;

        do_bottom_half();

        intr_count = 0;

    }

 

    run_task_queue(&tq_scheduler);

 

    need_resched = 0;

    prev = current;

    cli();

    /* move an exhausted RR process to be last.. */

    if (!prev->counter && prev->policy == SCHED_RR) {

        prev->counter = prev->priority;

        move_last_runqueue(prev);

    }

    switch (prev->state) {

        case TASK_INTERRUPTIBLE:

            if (prev->signal & ~prev->blocked)

                goto makerunnable;

            timeout = prev->timeout;

            if (timeout && (timeout <= jiffies)) {

                prev->timeout = 0;

                timeout = 0;

        makerunnable:

                prev->state = TASK_RUNNING;

                break;

            }

        default:

            del_from_runqueue(prev);

        case TASK_RUNNING:

    }

    p = init_task.next_run;

//获取进程双向链表的一个节点。

    sti();

   

#ifdef __SMP__

    /*

     *  This is safe as we do not permit re-entry of schedule()

     */

    prev->processor = NO_PROC_ID;

#define idle_task (task[cpu_number_map[this_cpu]])

#else

#define idle_task (&init_task)

#endif 

 

/*

 * Note! there may appear new tasks on the run-queue during this, as

 * interrupts are enabled. However, they will be put on front of the

 * list, so our list starting at "p" is essentially fixed.

 */

/* this is the scheduler proper: */

    c = -1000;

    next = idle_task;

    while (p != &init_task) {

//p初始值为init_task.next_run

//当回到init_task时说明已经查找为所有的了。

        int weight = goodness(p, prev, this_cpu);

        if (weight > c)

            c = weight, next = p;

        p = p->next_run;

    }

//这个是查找所有的task,找出最合适的task来调度。

    /* if all runnable processes have "counter == 0", re-calculate counters */

    if (!c) {

        for_each_task(p)

            p->counter = (p->counter >> 1) + p->priority;

    }

#ifdef __SMP__

    /*

     *  Allocate process to CPU

     */

     

     next->processor = this_cpu;

//将这个将要被执行的processor标识为这个cpu

     next->last_processor = this_cpu;

#endif 

#ifdef __SMP_PROF__

    /* mark processor running an idle thread */

    if (0==next->pid)

        set_bit(this_cpu,&smp_idle_map);

    else

        clear_bit(this_cpu,&smp_idle_map);

#endif

    if (prev != next) {

        struct timer_list timer;

 

        kstat.context_swtch++;

        if (timeout) {

            init_timer(&timer);

            timer.expires = timeout;

            timer.data = (unsigned long) prev;

            timer.function = process_timeout;

            add_timer(&timer);

        }

        get_mmu_context(next);

        switch_to(prev,next);

        if (timeout)

            del_timer(&timer);

    }

    return;

 

scheduling_in_interrupt:

    printk("Aiee: scheduling in interrupt %p\n",

        __builtin_return_address(0));

}

上面需要注意的是current变量,在单核中肯定就是一个变量,在多核中肯定是各个cpu有自己的current

其定义如下:

#define current (0+current_set[smp_processor_id()]

smpcurrentcurrent_set数组中的一个元素,是指具体一个cpu的当前进程。

从上面可以看出一个cpu是从全局task找一个task来运行,每个cpu有一个idle_task,这个task的编号是固定的。

 

所有的task可以通过init_task来找到,因为创建新进程(内核线程)的时候,会将新建的挂到链表上

init_task是静态挂在这上面的。

附上task_struct:

struct task_struct {

/* these are hardcoded - don't touch */

    volatile long state;    /* -1 unrunnable, 0 runnable, >0 stopped */

    long counter;

    long priority;

    unsigned long signal;

    unsigned long blocked;  /* bitmap of masked signals */

    unsigned long flags;    /* per process flags, defined below */

    int errno;

    long debugreg[8];  /* Hardware debugging registers */

    struct exec_domain *exec_domain;

/* various fields */

    struct linux_binfmt *binfmt;

    struct task_struct *next_task, *prev_task;

    struct task_struct *next_run,  *prev_run;

    unsigned long saved_kernel_stack;

    unsigned long kernel_stack_page;

    int exit_code, exit_signal;

    /* ??? */

    unsigned long personality;

    int dumpable:1;

    int did_exec:1;

    /* shouldn't this be pid_t? */

    int pid;

    int pgrp;

    int tty_old_pgrp;

    int session;

    /* boolean value for session group leader */

    int leader;

    int groups[NGROUPS];

    /*

     * pointers to (original) parent process, youngest child, younger sibling,

     * older sibling, respectively.  (p->father can be replaced with

     * p->p_pptr->pid)

     */

    struct task_struct *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr;

    struct wait_queue *wait_chldexit;   /* for wait4() */

    unsigned short uid,euid,suid,fsuid;

    unsigned short gid,egid,sgid,fsgid;

    unsigned long timeout, policy, rt_priority;

    unsigned long it_real_value, it_prof_value, it_virt_value;

    unsigned long it_real_incr, it_prof_incr, it_virt_incr;

    struct timer_list real_timer;

    long utime, stime, cutime, cstime, start_time;

/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */

    unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap;

    int swappable:1;

    unsigned long swap_address;

    unsigned long old_maj_flt;  /* old value of maj_flt */

    unsigned long dec_flt;      /* page fault count of the last time */

    unsigned long swap_cnt;     /* number of pages to swap on next pass */

/* limits */

    struct rlimit rlim[RLIM_NLIMITS];

    unsigned short used_math;

    char comm[16];

/* file system info */

    int link_count;

    struct tty_struct *tty; /* NULL if no tty */

/* ipc stuff */

    struct sem_undo *semundo;

    struct sem_queue *semsleeping;

/* ldt for this task - used by Wine.  If NULL, default_ldt is used */

    struct desc_struct *ldt;

/* tss for this task */

    struct thread_struct tss;

/* filesystem information */

    struct fs_struct *fs;

/* open file information */

    struct files_struct *files;

/* memory management info */

    struct mm_struct *mm;

/* signal handlers */

    struct signal_struct *sig;

#ifdef __SMP__

    int processor;

    int last_processor;

    int lock_depth;     /* Lock depth. We can context switch in and out of holding a syscall kernel lock... */ 

#endif 

};

 

故这个p = init_task.next_run;

p可以获取到所有在就绪状态的task;

阅读(3599) | 评论(0) | 转发(0) |
1

上一篇:多核包围我们

下一篇:一年了啊

给主人留下些什么吧!~~