进程(四)进程调度及切换-yulianliu1218-ChinaUnix博客

秋千已经荡啊荡yulianliu1218.blog.chinaunix.net

首页　| 　博文目录　| 　关于我

yulianliu1218

博客访问： 1502930
博文数量： 842
博客积分： 12411
博客等级：上将
技术积分： 5772
用户组：普通用户
注册时间： 2011-06-14 14:43

文章分类

全部博文（842）

linux（52）
java & （2）
软件测试（2）
未分配的博文（786）

文章存档

2013年（157）

2012年（685）

我的朋友

相关博文

进程(四)进程调度及切换

分类： LINUX

2013-07-26 16:33:28

原文地址：进程(四)进程调度及切换作者：wangbaolin719

一、调度策略
每个linux进程总是按照下边的调度类型被调度：
SCHED_FIFO：先进先出的实时进程。适用于时间性要求比较强，但每次运行所需的时间比较短的进程，实时的应用大都具有这样的特点。
SCHED_RR：时间片轮流的实时进程。轮流，适合比较大，每次运行需时较长的进程。
SCHED_OTHER：普通的分时进程。传统的调度政策，适合于交互式分时应用。
Linux 根据policy从整体上区分实时进程和普通进程，因为实时进程和普通进程度调度是不同的，它们两者之间，实时进程应该先于普通进程而运行，然后，对于同一类型的不同进程，采用不同的标准来选择进程：
1.普通进程调度
(1)静态优先级
每个普通进程都有它自己的静态优先级。内核从100~139表示。
静态优先级本质上决定了进程的基本时间片，即进程用完了以前的时间片时，系统分配给进程的时间片长度。
不随时间而改变，只能由用户进行修改。它指明了在被迫和其他进程竞争CPU之前，该进程所应该被允许的时间片的最大值。
若静态优先级<120:基本时间片（ms）=（140-静态优先级）*20
若静态优先级>=120:基本时间片（ms）=（140-静态优先级）*5
(2)动态优先级
普通进程除了静态优先级还有动态优先级，范围是100~139.
动态优先级是调度程序在选择新程序来运行的时候选择的数。只要进程拥有CPU，它就随着时间不断减小；当它小于0时，标记进程重新调度。它指明了在这个时间片中所剩余的时间量。
动态优先级=max(100,min(静态优先级-bonus+5,139))
2.实时进程的调度
每个实时进程都与一个实时优先级相关，范围从1~99表示。
与普通进程相反，实时进程总是被当成活动进程。
调度程序总是让优先级高的进程运行，发生以下情况实时进程才会被另外一个进程取代：
(1)进程被另外一个具有更高实时优先级的实时进程抢占。
(2)进程执行了阻塞操作并进入睡眠。
(3)进程停止或被杀死。
(4)进程通过系统调用自动放弃CPU。
(5)进程是基于时间片轮转的实时进程(SCHED_RR)，而且用完了时间片。
二、调度程序使用的数据结构
数据结构runqueue是linux2.6调度程序最重要的数据结构。
系统中每个cpu都有它自己的运行队列，所有的runqueue结构存放在runqueues每cpu变量中。
struct runqueue {
spinlock_t lock;//自旋锁，用于锁定运行队列
unsigned long nr_running;//可运行的的任务数目
unsigned long cpu_load[3];//基于运行队列中进程的平均数量的cpu负载因子
unsigned long long nr_switches;//上下文切换数目
unsigned long nr_uninterruptible;//处于不可中断或者睡眠状态的任务数目
unsigned long expired_timestamp;//队列最后被换出的时间
unsigned long long timestamp_last_tick;//最后一个调度程序的节拍
task_t *curr, *idle;//当前运行的任务和空闲的任务
struct mm_struct *prev_mm;//最后运行任务的mm_struct结构体。
prio_array_t *active, *expired, arrays[2];//1.活动优先级队列2.超时优先级队列，3.实际优先级数组
int best_expired_prio;//最适宜换出的优先级队列
atomic_t nr_iowait;//等待I/O操作的任务数目
struct sched_domain *sd;//多处理器相关
int active_balance;//活动的需要负载平衡进程个数
int push_cpu;//哪个CPU需要进行负载平衡？
task_t *migration_thread;//换出任务
struct list_head migration_queue;//换出队列
/* latency stats */
struct sched_info rq_sched_info;
/* sys_sched_yield() stats */
unsigned long yld_exp_empty;
unsigned long yld_act_empty;
unsigned long yld_both_empty;
unsigned long yld_cnt;
/* schedule() stats */
unsigned long sched_switch;
unsigned long sched_cnt;
unsigned long sched_goidle;
/* try_to_wake_up() stats */
unsigned long ttwu_cnt;
unsigned long ttwu_local;
};
三、实际代码分析
//schedule只能由进程在内核中主动调用，或者在当前进程从系统空间返回到用户空间的前夕被动的调用。而不能在一个中断服务程序中调用
asmlinkage void schedule(void)
{
struct schedule_data * sched_data;
struct task_struct *prev, *next, *p;
struct list_head *tmp;
int this_cpu, c;
if (!current->active_mm) BUG();//内存描述符不为空
need_resched_back:
prev = current;
this_cpu = prev->processor;//当前进程所在的处理器
if (in_interrupt())//调度不能发生在中断服务程序中
goto scheduling_in_interrupt;
release_kernel_lock(prev, this_cpu);//空语句
//检查是否有内核软中断服务请求在等待
if (softirq_active(this_cpu) & softirq_mask(this_cpu))
goto handle_softirq;//有就转去执行软中断服务请求
handle_softirq_back:
//将当前cpu的schedule_data信息保存
sched_data = & aligned_data[this_cpu].schedule_data;
//锁住队列
spin_lock_irq(&runqueue_lock);
if (prev->policy == SCHED_RR)//若调度政策为SCHED_RR，即轮流政策
goto move_rr_last;//转去检查时间配额是否为0，为0就重新分配，然后再跳转回来
move_rr_back:
switch (prev->state) {//查询当前进程状态
case TASK_INTERRUPTIBLE://可中断的睡眠状态
if (signal_pending(prev)) {//有信号到来
prev->state = TASK_RUNNING;//设置为可运行状态
break;
}
default://深度睡眠
del_from_runqueue(prev);//将当前进程从可执行队列中删除
case TASK_RUNNING:
}
prev->need_resched = 0;//调度标志清0
repeat_schedule://挑选一个进程来运行
next = idle_task(this_cpu);//从0号进程开始
c = -1000;//权值设为-1000
if (prev->state == TASK_RUNNING)//当前进程状态为可运行状态
goto still_running;//转去计算当前进程的权值，并把最优进程设置为当前进程，然后跳转回来接着往下找是否还有更优的进程
still_running_back:
list_for_each(tmp, &runqueue_head) {//遍历可执行队列中的所有进程
p = list_entry(tmp, struct task_struct, run_list);//从队列中依次取出进程的task_struct结构
if (can_schedule(p, this_cpu)) {//取出的进程是否允许调度
int weight = goodness(p, this_cpu, prev->active_mm);//计算出取出进程的权值
if (weight > c)//是否大于C
c = weight, next = p;//最优进程next改成权值最大的进程
}
}
if (!c)//若c为0，表明可运行队列中的所有进程的权值都是0，且都是SCHED_OTHER的进程
goto recalculate;//转去重新计算各个进程的时间配额
sched_data->curr = next;
#ifdef CONFIG_SMP
next->has_cpu = 1;//表明已经在某一个cpu上开始运行
next->processor = this_cpu;//最优进程运行的处理器号
#endif
spin_unlock_irq(&runqueue_lock);
if (prev == next)//若挑选出来的进程和当前进程一致
goto same_process;//不需切换，退出
kstat.context_swtch++;
prepare_to_switch();//空语句
struct mm_struct *mm = next->mm;
struct mm_struct *oldmm = prev->active_mm;
if (!mm) {//最优进程若是内核线程，不需要切换进程的虚拟空间，
if (next->active_mm) BUG();
next->active_mm = oldmm;//内核线程使用当前进程的内核空间
atomic_inc(&oldmm->mm_count);//当前进程的内核空间使用计数递加
enter_lazy_tlb(oldmm, next, this_cpu);
} else {//最优进程非内核线程
if (next->active_mm != mm) BUG();
switch_mm(oldmm, mm, next, this_cpu);//虚拟空间的切换
}
if (!prev->mm) {//当前进程是内核线程
prev->active_mm = NULL;//借来的内存描述符清空
mmdrop(oldmm);//递减共享计数
}
switch_to(prev, next, prev);//进程的切换，主要是堆栈的切换
__schedule_tail(prev);//将当前进程的SCHED_YIELD标志清0
650
same_process:
reacquire_kernel_lock(current);
if (current->need_resched)//调度标志又为1
goto need_resched_back;//转回重新调度
return;//成功返回
recalculate:
{
struct task_struct *p;
spin_unlock_irq(&runqueue_lock);
read_lock(&tasklist_lock);
for_each_task(p)//遍历所有进程
p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);//根据静态优先级nice重新分配时间配额
read_unlock(&tasklist_lock);
spin_lock_irq(&runqueue_lock);
}
goto repeat_schedule;
still_running:
c = goodness(prev, this_cpu, prev->active_mm);//计算出当前进程的权值
next = prev;//将当前进程暂时设置为最优进程，也就是说下边的查找会以当前进程的权值做起点
goto still_running_back;
handle_softirq:
do_softirq();//处理软中断请求
goto handle_softirq_back;
move_rr_last:
if (!prev->counter) {//当前进程的时间配额为0
prev->counter = NICE_TO_TICKS(prev->nice);//重新分配时间按配额
move_last_runqueue(prev);//加入到可执行进程队列末尾
}
goto move_rr_back;
scheduling_in_interrupt:
printk("Scheduling in interrupt\n");
BUG();
return;
}
#if HZ < 200
#define TICK_SCALE(x) ((x) >> 2)
#elif HZ < 400
#define TICK_SCALE(x) ((x) >> 1)
#elif HZ < 800
#define TICK_SCALE(x) (x)
#elif HZ < 1600
#define TICK_SCALE(x) ((x) << 1)
#else
#define TICK_SCALE(x) ((x) << 2)
#endif
#define NICE_TO_TICKS(nice) (TICK_SCALE(20-(nice))+1)
static inline int goodness(struct task_struct * p, int this_cpu, struct mm_struct *this_mm)
{
int weight;
weight = -1;
if (p->policy & SCHED_YIELD)//若进程设置了”礼让“，则权值设为weight = -1返回
goto out;
if (p->policy == SCHED_OTHER) {//对于没有实时要求的进程
weight = p->counter;//权值设为剩下的时间配额
if (!weight)//若剩下的时间配额为0，则权值weight = 0返回
goto out;
#ifdef CONFIG_SMP
if (p->processor == this_cpu)
weight += PROC_CHANGE_PENALTY;
#endif
if (p->mm == this_mm || !p->mm)//若是内核线程且其用户空间与当前进程相同，无需切换用户空间
weight += 1;//得到一点奖励
weight += 20-p->nice;//20-p->nice范围为1-40，nice为谦让程度，nice为非实时进程的静态优先级
goto out;//返回weight
}
//对于有实时要求的进程，即调度政策为SCHED_FIFO 或者SCHED_RR的进程
weight = 1000 + p->rt_priority;//加上实时优先级（动态优先级）
out:
return weight;
}
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk,unsigned cpu)
{
if (prev != next) {
clear_bit(cpu, &prev->cpu_vm_mask);
if (prev->context.segments != next->context.segments)
load_LDT(next);
#ifdef CONFIG_SMP
cpu_tlbstate[cpu].state = TLBSTATE_OK;
cpu_tlbstate[cpu].active_mm = next;
#endif
set_bit(cpu, &next->cpu_vm_mask);
//将新进程的页面目录的起始地址装入cr3中
asm volatile("movl %0,%%cr3": :"r" (__pa(next->pgd)));
}
}
#define switch_to(prev,next,last) do { \
16 asm volatile("pushl %%esi\n\t" \//当前进程prev的寄存器入栈
17 "pushl %%edi\n\t" \
18 "pushl %%ebp\n\t" \
19 "movl %%esp,%0\n\t" /* save ESP */ \//将当前进程prev系统空间堆栈指针存入prev->thread.esp
20 "movl %3,%%esp\n\t" /* restore ESP */ \//将新受到调度要进入运行的进程next的系统空间堆栈指针next->thread.esp存入ESP
//从21行开始，已经切换了堆栈，现在使用的是next进程的堆栈
21 "movl $1f,%1\n\t" /* save EIP */ \//将25行pop指令所在的地址保存在prev->thread.eip
22 "pushl %4\n\t" /* restore EIP */ \//将next->thread.eip压入堆栈，next->thread.eip正是进程next在上一次被调离时在21行保存的，指向1
23 "jmp __switch_to\n" \//从__switch_to返回时，执行那里的ret指令，由于是jmp指令转过去的，最后进入堆栈的
//next->thread.eip变成了返回地址，也就是标号1所在的地址，也就是25行指令所在的地址
24 "1:\t" \
25 "popl %%ebp\n\t" \//恢复新切入的进程next在上一次被调离时push进堆栈的内容，前边堆栈已经切换了
26 "popl %%edi\n\t" \
27 "popl %%esi\n\t" \
28 :"=m" (prev->thread.esp),"=m" (prev->thread.eip), \
29 "=b" (last) \
30 :"m" (next->thread.esp),"m" (next->thread.eip), \
31 "a" (prev), "d" (next), \
32 "b" (prev)); \
} while (0)
void __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev = &prev_p->thread,
*next = &next_p->thread;
struct tss_struct *tss = init_tss + smp_processor_id();
unlazy_fpu(prev_p);
tss->esp0 = next->esp0;//TSS内核的内核空间堆栈指针设为要调度运行的进程的堆栈指针
asm volatile("movl %%fs,%0":"=m" (*(int *)&prev->fs));
asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs));
loadsegment(fs, next->fs);
loadsegment(gs, next->gs);
if (next->debugreg[7]){
loaddebug(next, 0);
loaddebug(next, 1);
loaddebug(next, 2);
loaddebug(next, 3);
loaddebug(next, 6);
loaddebug(next, 7);
}
if (prev->ioperm || next->ioperm) {//有I/O位图
if (next->ioperm) {
memcpy(tss->io_bitmap, next->io_bitmap,IO_BITMAP_SIZE*sizeof(unsigned long));//拷贝要调度运行的进程的IO位位图
tss->bitmap = IO_BITMAP_OFFSET;
} else
tss->bitmap = INVALID_IO_BITMAP_OFFSET;
}
}
//////////////////////////////////////////////////////////////////////////////////////////////
/*************************************强制性调度*********************************************/
//内核中的强制性调度，也就是非自愿的、被动的剥夺式的调度，主要是由时间引起的，这种调度发生在
//进程从系统空间返回到用户空间的前夕。如下三种情况：
//1.在时钟中断的服务程序中，发现当前进程运行时间过长
//2.当唤醒一个睡眠中的进程时，发现被唤醒的进程比当前进程更有资格运行。
//3.一个进程通过系统调用改变调度政策或礼让。
//第一种情况：
[do_timer_interrupt()>do_timer()>update_process_times()]
void update_process_times(int user_tick)//调整当前进程与时间有关的一些参数
{
581 struct task_struct *p = current;
582 int cpu = smp_processor_id(), system = user_tick ^ 1;
583
584 update_one_process(p, user_tick, system, cpu);//统计信息
585 if (p->pid) {//0号进程，时间配额不能递减
586 if (--p->counter <= 0) {//时间配额减为0或小于0
587 p->counter = 0;//设为0
588 p->need_resched = 1;//设置调度标志
589 }
590 if (p->nice > 0)
591 kstat.per_cpu_nice[cpu] += user_tick;
592 else
593 kstat.per_cpu_user[cpu] += user_tick;
594 kstat.per_cpu_system[cpu] += system;
595 } else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
596 kstat.per_cpu_system[cpu] += system;
}
//第二种情况：
inline void wake_up_process(struct task_struct * p)
{
331 unsigned long flags;
332
336 spin_lock_irqsave(&runqueue_lock, flags);
337 p->state = TASK_RUNNING;
338 if (task_on_runqueue(p))//当前唤醒进程是否已经在cpu的可运行队列runqueue
339 goto out;
340 add_to_runqueue(p);//挂入cpu的可运行队列runqueue
341 reschedule_idle(p);
out:
343 spin_unlock_irqrestore(&runqueue_lock, flags);
}
static void reschedule_idle(struct task_struct * p)
{
#ifdef CONFIG_SMP
#else /* UP */
287 int this_cpu = smp_processor_id();
288 struct task_struct *tsk;
289 //#define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr
290 tsk = cpu_curr(this_cpu);
291 if (preemption_goodness(tsk, p, this_cpu) > 1)//将当前运行进程和唤醒进程比较
292 tsk->need_resched = 1;//唤醒的进程权值高，就设置调度标志
#endif
}
static inline int preemption_goodness(struct task_struct * prev, struct task_struct * p, int cpu)
{
return goodness(p, cpu, prev->active_mm)-goodness(prev, cpu, prev->active_mm);
}
//第三种情况：自愿让出
asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,struct sched_param *param)
{
return setscheduler(pid, policy, param);
}
asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param *param)
{
return setscheduler(pid, -1,param);
}
//改变进程的调度政策
static int setscheduler(pid_t pid, int policy,struct sched_param *param)
{
890 struct sched_param lp;
891 struct task_struct *p;
892 int retval;
893
894 retval = -EINVAL;
895 if (!param || pid < 0)//检查参数和PID
896 goto out_nounlock;
897
898 retval = -EFAULT;
899 if (copy_from_user(&lp, param, sizeof(struct sched_param)))//从用户空间拷贝到内核
900 goto out_nounlock;
901
905 read_lock_irq(&tasklist_lock);
906 spin_lock(&runqueue_lock);
907 //根据PID找到进程的task_struct结构
908 p = find_process_by_pid(pid);
909
910 retval = -ESRCH;
911 if (!p)
912 goto out_unlock;
913
914 if (policy < 0)
915 policy = p->policy;
916 else {
917 retval = -EINVAL;
918 if (policy != SCHED_FIFO && policy != SCHED_RR &&policy != SCHED_OTHER)//调度政策必须为这三种
920 goto out_unlock;
921 }
922
927 retval = -EINVAL;
928 if (lp.sched_priority < 0 || lp.sched_priority > 99)
929 goto out_unlock;
930 if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))
931 goto out_unlock;
932
933 retval = -EPERM;
934 if ((policy == SCHED_FIFO || policy == SCHED_RR) &&!capable(CAP_SYS_NICE))//是否有权限
936 goto out_unlock;
937 if ((current->euid != p->euid) && (current->euid != p->uid) &&!capable(CAP_SYS_NICE))
939 goto out_unlock;
940
941 retval = 0;
942 p->policy = policy;//设置新的调度政策
943 p->rt_priority = lp.sched_priority;
944 if (task_on_runqueue(p))//检查是否在可运行队列中
945 move_first_runqueue(p);//在队列中的话，移到队列前部
946
947 current->need_resched = 1;//重新设置调度标志
948
out_unlock:
950 spin_unlock(&runqueue_lock);
951 read_unlock_irq(&tasklist_lock);
952
out_nounlock:
954 return retval;
}
//为其它进程让路，但并不进入睡眠
asmlinkage long sys_sched_yield(void)
{
1028
1029 int nr_pending = nr_running;//正在等待运行的进程数量
1030
#if CONFIG_SMP
1032 int i;
1035 for (i = 0; i < smp_num_cpus; i++)
1036 if (aligned_data[i].schedule_data.curr != idle_task(i))
1037 nr_pending--;
#else
1040 nr_pending--;
#endif
1042 if (nr_pending) {//若还有等待运行的进程
1047 if (current->policy == SCHED_OTHER)
1048 current->policy |= SCHED_YIELD;//调度政策设为“自动礼让”
1049 current->need_resched = 1;//重新设置调度标志
1050 }
1051 return 0;
}

阅读(1210) | 评论(0) | 转发(0) |

上一篇：linux fread and fwrite

下一篇：Linux之管道与重定向

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6