Chinaunix首页 | 论坛 | 博客
  • 博客访问: 1322315
  • 博文数量: 482
  • 博客积分: 13297
  • 博客等级: 上将
  • 技术积分: 2890
  • 用 户 组: 普通用户
  • 注册时间: 2009-10-12 16:25
文章分类

全部博文(482)

文章存档

2012年(9)

2011年(407)

2010年(66)

分类: LINUX

2011-03-31 17:16:21

分析schedule()函数之前,最好还是看看这篇文章, 他主要讲讲了理论,http://www-128.ibm.com/developerworks/cn/linux/kernel/l-kn26sch/index.html

我主要是从代码的角度简单的分析了一下。

 

 

/*
 * 调度的主要函数,研究一下到底是怎么样进行调度的
 * __schedule() is the main scheduler function.
 */

void __sched __schedule(void)
{
    struct task_struct *prev, *next;
    struct prio_array *array;
    struct list_head *queue;
    unsigned long long now;
    unsigned long run_time;
    int cpu, idx, new_prio;
    long *switch_count;
    struct rq *rq;

    WARN_ON(system_state == SYSTEM_BOOTING);

    
/*
     * Test if we are atomic. Since do_exit() needs to call into
     * schedule() atomically, we ignore that path for now.
     * Otherwise, whine if we are scheduling when we should not be.
     */

    if (unlikely(in_atomic() && !current->exit_state)) {
        stop_trace();
        printk(KERN_ERR "BUG: scheduling while atomic: "
            "%s/0x%08x/%d, CPU#%d\n",
            current->comm, preempt_count(), current->pid,
            smp_processor_id());
        dump_stack();
    }
    profile_hit(SCHED_PROFILING, __builtin_return_address(0));
    
//禁止抢占

    preempt_disable(); 
// FIXME: disable irqs here

    prev = current;
    release_kernel_lock(prev);
    rq = this_rq();
    

//处理idle进程

    
/*
     * The idle thread is not allowed to schedule!
     * Remove this check after it has been exercised a bit.
     */

     
//会有这种情况吗? idle进程就是当前进程,且状态不是TASK_RUNNING 

    if (unlikely(prev == rq->idle) && prev->state != TASK_RUNNING) {
        printk(KERN_ERR "BUG: scheduling from the idle thread!\n");
        dump_stack();
    }

    schedstat_inc(rq, sched_cnt); 
//rq->sched_cnt++

    now = sched_clock(); 
//返回当前的时间(ns级别的)

    if (likely((long long)(now - prev->timestamp) < NS_MAX_SLEEP_AVG)) {
        run_time = now - prev->timestamp;
        if (unlikely((long long)(now - prev->timestamp) < 0)) 
//可能发生吗? 怎么会出现这种情况呢?

            run_time = 0;
    } else
        run_time = NS_MAX_SLEEP_AVG;

    
/*
     * Tasks charged proportionately(相称的,成比例的) less run_time at high sleep_avg to
     * delay them losing their interactive status
     */

    run_time /= (CURRENT_BONUS(prev) ? : 1);

    cpu = smp_processor_id();
    spin_lock_irq(&rq->lock);

    switch_count = &prev->nvcsw; 
// TODO: temporary - to see it in vmstat

    if ((prev->state & ~TASK_RUNNING_MUTEX) &&
            !(preempt_count() & PREEMPT_ACTIVE)) {
        switch_count = &prev->nvcsw;
        if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
                unlikely(signal_pending(prev))))
            prev->state = TASK_RUNNING;
        else {
            if (prev->state == TASK_UNINTERRUPTIBLE) {
                rq->nr_uninterruptible++;
                incr_rt_nr_uninterruptible(prev, rq);
            }
            touch_softlockup_watchdog();
            deactivate_task(prev, rq);
        }
    }
    
    if (preempt_count() & PREEMPT_ACTIVE) 
//表明当前进程是否可以抢占

        sub_preempt_count(PREEMPT_ACTIVE); 
//变成可抢占的

        
//从rq中删除掉

    if (unlikely(prev->flags & PF_DEAD)) {
        if (prev->state != TASK_RUNNING) {
            printk("prev->state: %ld != TASK_RUNNING??\n",
                prev->state);
            WARN_ON(1);
        } else
            deactivate_task(prev, rq); 
//已经dead了,那么进程状态肯定应该是running,然后从可运行队列中删除去

        prev->state = EXIT_DEAD;
    }

#if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP)
    if (unlikely(atomic_read(&rt_overload)))
        balance_rt_tasks(rq, cpu);
#endif

    
//如何调度到idle进程的代码

    if (unlikely(!rq->nr_running)) { 
//这个运行队列里面的进程个数,包含了active和expired两个优先级队列里面的进程

        idle_balance(cpu, rq); 
//与up无关

        if (!rq->nr_running) {
            next = rq->idle; 
//如果可运行队列里面的进程数为0,就调用idle进程

            rq->expired_timestamp = 0;
            wake_sleeping_dependent(cpu);
            goto switch_tasks;
        }
    }
//开始置换active队列和expired队列了

    array = rq->active; 
    
//不太进程发生

    if (unlikely(!array->nr_active)) { 
//如果active里面的进程个数是0,就要和expired队列置换

        
/*
         * Switch the active and expired arrays.
         */

        schedstat_inc(rq, sched_switch);
        rq->active = rq->expired;
        rq->expired = array;
        array = rq->active; 
//array 最终指向了expired queue

        rq->expired_timestamp = 0;
        rq->best_expired_prio = MAX_PRIO;
    }

//选择一个最合适的进程投入运行

    idx = sched_find_first_bit(array->bitmap); 
//找到第一个优先级最高的那个index

    queue = array->queue + idx;
    next = list_entry(queue->next, struct task_struct, run_list); 
//选中队列中的第一个进程


    if (!rt_task(next) && interactive_sleep(next->sleep_type)) {
        unsigned long long delta = now - next->timestamp;
        if (unlikely((long long)(now - next->timestamp) < 0))
            delta = 0;

        if (next->sleep_type == SLEEP_INTERACTIVE)
            delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128;

        array = next->array;
        new_prio = recalc_task_prio(next, next->timestamp + delta);

        if (unlikely(next->prio != new_prio)) {
            dequeue_task(next, array);
            next->prio = new_prio;
            enqueue_task(next, array);
        }
    }
    next->sleep_type = SLEEP_NORMAL;
    if (dependent_sleeper(cpu, rq, next))
        next = rq->idle;



//正式开始进程切换


switch_tasks: 
//进程切换

    if (next == rq->idle)
        schedstat_inc(rq, sched_goidle);
    prefetch(next);
    prefetch_stack(next);
    clear_tsk_need_resched(prev); 
// #define TIF_NEED_RESCHED    2

    clear_tsk_need_resched_delayed(prev);
    rcu_qsctr_inc(task_cpu(prev));

    update_cpu_clock(prev, rq, now);

    prev->sleep_avg -= run_time;
    if ((long)prev->sleep_avg <= 0)
        prev->sleep_avg = 0;
    prev->timestamp = prev->last_ran = now;

    trace_all_runnable_tasks(rq);

    sched_info_switch(prev, next);

//开始进程切换了

    if (likely(prev != next)) {
        
//一些队列本身的属性值的更新

        next->timestamp = now;
        rq->nr_switches++;
        rq->curr = next;
        ++*switch_count;

        prepare_task_switch(rq, next);
        MARK(kernel_sched_schedule, "%d %d %ld", 
             prev->pid, next->pid, prev->state);
        prev = context_switch(rq, prev, next);
        barrier();
        trace_special_pid(prev->pid, PRIO(prev), PRIO(current));
        
/*
         * this_rq must be evaluated again because prev may have moved
         * CPUs since it called schedule(), thus the 'rq' on its stack
         * frame will be invalid.
         */

        finish_task_switch(this_rq(), prev);
        __preempt_enable_no_resched();
    } else {
        __preempt_enable_no_resched();
        spin_unlock(&rq->lock);
        trace_stop_sched_switched(next);
    }

    reacquire_kernel_lock(current);
}

-----接下来就是关于 如何从rq里面删除进程了。

//这里的两个函数很关键的, 当把一个就绪队列里面的进程,删除, 就是调用的这两个函数。 
从中,我们可以看出,schedule()函数 ,当发现当前进程的状态是INTERRUPTIBLE并且是有未决信号等待处理的(也就是他收到了一个信号,sig_pending=1) , 那么就把当前这个进程的状态置于TASK_RUNNING ,但是请注意,这仅表示, 以后scheduler有可能再次调度当前这个进程而已。 这次肯定是调度另外一个了。 



/*
* Adding/removing a task to/from a priority array:
*/

static void dequeue_task(struct task_struct *p, struct prio_array *array) //¸Ã½ø3ìÔúÄ3¸öóÅÏ輶¶óáDàïÃæ

{
    array->nr_active--;
    list_del(&p->run_list);
    if (list_empty(array->queue + p->prio))
        __clear_bit(p->prio, array->bitmap);
    dec_rt_tasks(p, array->rq);
}


/*
* deactivate_task - remove a task from the runqueue.
*/

static void deactivate_task(struct task_struct *p, struct rq *rq)
{
    trace_special_pid(p->pid, PRIO(p), rq->nr_running);
    dec_nr_running(p, rq);
    dequeue_task(p, p->array);
    p->array = NULL;
}

====

http://blogold.chinaunix.net/u/22617/showart_384393.html

阅读(1245) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~