Chinaunix首页 | 论坛 | 博客
  • 博客访问: 48037
  • 博文数量: 37
  • 博客积分: 1800
  • 博客等级: 上尉
  • 技术积分: 451
  • 用 户 组: 普通用户
  • 注册时间: 2010-11-29 19:56
文章存档

2011年(9)

2010年(28)

我的朋友

分类: LINUX

2010-12-29 14:53:05

 
 

Linux系统内核抢占补丁的基本原理

 CPU在中运行时并不是处处不可抢占的,中存在一些空隙,在这时进行抢占是安全的,抢占的基本原理就是将SMP可并行的代码段看成是可以进行抢占的区域。

  2.4正好细化了多CPU下的线程同步机构,对不可并行的指令块用spinlock和rwlock作了细致的表示,该的实现可谓水到渠成。具体的方法就是在进程的任务结构上增加一个preempt_count变量作为抢占锁,它随着spinlock和rwlock一起加锁和解锁。当preempt_count为0时表示可以进行调度。调度器的入口为preempt_schedule(),它将当前进程标记为TASK_PREEMPTED状态再调用schedule(),在TASK_PREEMPTED状态,schedule()不会将进程从运行队列中删除。

  下面是抢占的主要代码示意:

  arch/i386/kernel/entry.S:

  preempt_count = 4 # 将task_struct中的flags用作preempt_count,flags被移到了别

  的位置

  ret_from_exception: # 从异常返回

  #ifdef CONFIG_SMP

  GET_CURRENT(%ebx)

  movl processor(%ebx),%eax

  shll $CONFIG_X86_L1_CACHE_SHIFT,%eax

  movl SYMBOL_NAME(irq_stat)(,%eax),%ecx # softirq_active

  testl SYMBOL_NAME(irq_stat)+4(,%eax),%ecx # softirq_mask

  #else

  movl SYMBOL_NAME(irq_stat),%ecx # softirq_active

  testl SYMBOL_NAME(irq_stat)+4,%ecx # softirq_mask

  #endif

  jne handle_softirq

  #ifdef CONFIG_PREEMPT

  cli

  incl preempt_count(%ebx) # 异常的入口没有禁止调度的指令,与ret_from_intr

  匹配一下

  #endif

  ENTRY(ret_from_intr) # 硬件中断的返回

  GET_CURRENT(%ebx)

  #ifdef CONFIG_PREEMPT

  cli

  decl preempt_count(%ebx) # 恢复抢占标志

  #endif

  movl EFLAGS(%esp),%eax # mix EFLAGS and CS

  movb CS(%esp),%al

  testl $(VM_MASK | 3),%eax # return to VM86 mode or non-supervisor?

  jne ret_with_reschedule

  #ifdef CONFIG_PREEMPT

  cmpl ,preempt_count(%ebx)

  jnz restore_all # 如果preempt_count非零则表示禁止抢占

  cmpl ,need_resched(%ebx)

  jz restore_all #

  movl SYMBOL_NAME(irq_stat)+irq_stat_local_bh_count CPU_INDX,%ecx

  addl SYMBOL_NAME(irq_stat)+irq_stat_local_irq_count CPU_INDX,%ecx

  jnz restore_all

  incl preempt_count(%ebx)

  sti

  call SYMBOL_NAME(preempt_schedule)

  jmp ret_from_intr # 新进程返回,返回ret_from_intr恢复抢占标志后再返回

  #else

  jmp restore_all

  #endif

  ALIGN

  handle_softirq:

  #ifdef CONFIG_PREEMPT

  cli

  GET_CURRENT(%ebx)

  incl preempt_count(%ebx)

  sti

  #endif

  call SYMBOL_NAME(do_softirq)

  jmp ret_from_intr

  ALIGN

  reschedule:

  call SYMBOL_NAME(schedule) # test

  jmp ret_from_sys_call

  include/asm/hw_irq.h:

  ...

  #ifdef CONFIG_PREEMPT

  #define BUMP_CONTEX_SWITCH_LOCK

  GET_CURRENT

  "incl 4(%ebx)nt"

  #else

  #define BUMP_CONTEX_SWITCH_LOCK

  #endif

  #define SAVE_ALL 硬件中断保护入口现场

  "cldnt"

  "pushl %esnt"

  "pushl %dsnt"

  "pushl %eaxnt"

  "pushl %ebpnt"

  "pushl %edint"

  "pushl %esint"

  "pushl %edxnt"

  "pushl %ecxnt"

  "pushl %ebxnt"

  "movl $" STR(__KERNEL_DS) ",%edxnt"

  "movl %edx,%dsnt"

  "movl %edx,%esnt"

  BUMP_CONTEX_SWITCH_LOCK # 硬件中断的入口禁止抢占

  include//spinlock.h:

  #ifdef CONFIG_PREEMPT

  #define switch_lock_count() current->preempt_count

  #define in_ctx_sw_off() (switch_lock_count().counter) 判断当前进程的抢占计数

  是否非零

  #define atomic_ptr_in_ctx_sw_off() (&switch_lock_count())

  #define ctx_sw_off() 禁止抢占

  do {

  atomic_inc(atomic_ptr_in_ctx_sw_off()); 当前进程的抢占计数增1

  } while (0)

  #define ctx_sw_on_no_preempt() 允许抢占

  do {

  atomic_dec(atomic_ptr_in_ctx_sw_off()); 当前进程的抢占计数减1

  } while (0)

  #define ctx_sw_on() 允许并完成抢占

  do {

  if (atomic_dec_and_test(atomic_ptr_in_ctx_sw_off()) &&

  current->need_resched)

  preempt_schedule();

  } while (0)

  #define spin_lock(lock)

  do {

  ctx_sw_off(); 进入自旋锁时禁止抢占

  _raw_spin_lock(lock);

  } while(0)

  #define spin_trylock(lock) ({ctx_sw_off(); _raw_spin_trylock(lock) ? 锁定并

  测试原来是否上锁

  1 : ({ctx_sw_on(); 0;});})

  #define spin_unlock(lock)

  do {

  _raw_spin_unlock(lock);

  ctx_sw_on(); 离开自旋锁时允许并完成抢占

  } while (0)

  #define read_lock(lock) ({ctx_sw_off(); _raw_read_lock(lock);})

  #define read_unlock(lock) ({_raw_read_unlock(lock); ctx_sw_on();})

  #define write_lock(lock) ({ctx_sw_off(); _raw_write_lock(lock);})

  #define write_unlock(lock) ({_raw_write_unlock(lock); ctx_sw_on();})

  #define write_trylock(lock) ({ctx_sw_off(); _raw_write_trylock(lock) ?

  1 : ({ctx_sw_on(); 0;});})

  ...

  include/asm/softirq.h:

  #define cpu_bh_disable(cpu) do { ctx_sw_off(); local_bh_count(cpu)++; barrie

  r(); } while (0)

  #define cpu_bh_enable(cpu) do { barrier(); local_bh_count(cpu)--;ctx_sw_on()

  ; } while (0)

  kernel/schedule.c:

  #ifdef CONFIG_PREEMPT

  asmlinkage void preempt_schedule(void)

  {

      while (current->need_resched) {

      ctx_sw_off();

      current->state |= TASK_PREEMPTED;

      schedule();

      current->state &= ~TASK_PREEMPTED;

      ctx_sw_on_no_preempt();

    }

  }

  #endif

  asmlinkage void schedule(void)

  {

  struct schedule_data * sched_data;

  struct task_struct *prev, *next, *p;

  struct list_head *tmp;

  int this_cpu, c;

  #ifdef CONFIG_PREEMPT

  ctx_sw_off();

  #endif

  if (!current->active_mm) BUG();

  need_resched_back:

  prev = current;

  this_cpu = prev->processor;

  if (in_interrupt())

  goto scheduling_in_interrupt;

  release_kernel_lock(prev, this_cpu);

  /* Do "administrative" work here while we don't hold any locks */

  if (softirq_active(this_cpu) &softirq_mask(this_cpu))

  goto handle_softirq;

  handle_softirq_back:

  /*

  * 'sched_data' is protected by the fact that we can run

  * only one process per CPU.

  */

  sched_data = &aligned_data[this_cpu].schedule_data;

  spin_lock_irq(&runqueue_lock);

  /* move an exhausted RR process to be last.. */

  if (prev->policy == SCHED_RR)

  goto move_rr_last;

  move_rr_back:

  switch (prev->state) {

  case TASK_INTERRUPTIBLE:

  if (signal_pending(prev)) {

  prev->state = TASK_RUNNING;

  break;

  }

  default:

  #ifdef CONFIG_PREEMPT

  if (prev->state &TASK_PREEMPTED)

  break; 如果是内核抢占调度,则保留运行队列

  #endif

  del_from_runqueue(prev);

  #ifdef CONFIG_PREEMPT

  case TASK_PREEMPTED:

  #endif

  case TASK_RUNNING:

  }

  prev->need_resched = 0;

  /*

  * this is the scheduler proper:

  */

  repeat_schedule:

  /*

  * Default process to select..

  */

  next = idle_task(this_cpu);

  c = -1000;

  if (task_on_runqueue(prev))

  goto still_running;

  still_running_back:

  list_for_each(tmp, &runqueue_head) {

  p = list_entry(tmp, struct task_struct, run_list);

  if (can_schedule(p, this_cpu)) {

  int weight = goodness(p, this_cpu, prev->active_mm);

  if (weight >c)

  c = weight, next = p;

  }

  }

  /* Do we need to re-calculate counters? */

  if (!c)

  goto recalculate;

  /*

  * from this point on nothing can prevent us from

  * switching to the next task, save this fact in

  * sched_data.

  */

  sched_data->curr = next;

  #ifdef CONFIG_SMP

  next->has_cpu = 1;

  next->processor = this_cpu;

  #endif

  spin_unlock_irq(&runqueue_lock);

  if (prev == next)

  goto same_process;

  #ifdef CONFIG_SMP

  /*

  * maintain the per-process 'last schedule' value.

  * (this has to be recalculated even if we reschedule to

  * the same process) Currently this is only used on SMP,

  * and it's approximate, so we do not have to maintain

  * it while holding the runqueue spinlock.

  */

  sched_data->last_schedule = get_cycles();

  /*

  * We drop the scheduler lock early (it's a global spinlock),

  * thus we have to lock the previous process from getting

  * rescheduled during switch_to().

  */

  #endif /* CONFIG_SMP */

  kstat.context_swtch++;

  /*

  * there are 3 processes which are affected by a context switch:

  *

  * prev == .... ==> (last => next)

  *

  * It's the 'much more previous' 'prev' that is on next's stack,

  * but prev is set to (the just run) 'last' process by switch_to().

  * This might sound slightly confusing but makes tons of sense.

  */

  prepare_to_switch();

  {

  struct mm_struct *mm = next->mm;

  struct mm_struct *oldmm = prev->active_mm;

  if (!mm) {

  if (next->active_mm) BUG();

  next->active_mm = oldmm;

  atomic_inc(&oldmm->mm_count);

  enter_lazy_tlb(oldmm, next, this_cpu);

  } else {

  if (next->active_mm != mm) BUG();

  switch_mm(oldmm, mm, next, this_cpu);

  }

  if (!prev->mm) {

  prev->active_mm = NULL;

  mmdrop(oldmm);

  }

  }

  /*

  * This just switches the register state and the

  * stack.

  */

  switch_to(prev, next, prev);

  __schedule_tail(prev);

  same_process:

  reacquire_kernel_lock(current);

  if (current->need_resched)

  goto need_resched_back;

  #ifdef CONFIG_PREEMPT

  ctx_sw_on_no_preempt();

  #endif

  return;

  recalculate:

  {

  struct task_struct *p;

  spin_unlock_irq(&runqueue_lock);

  read_lock(&tasklist_lock);

  for_each_task(p)

  p->counter = (p->counter >>1) + NICE_TO_TICKS(p->nice);

  read_unlock(&tasklist_lock);

  spin_lock_irq(&runqueue_lock);

  }

  goto repeat_schedule;

  still_running:

  c = goodness(prev, this_cpu, prev->active_mm);

  next = prev;

  goto still_running_back;

  handle_softirq:

  do_softirq();

  goto handle_softirq_back;

  move_rr_last:

  if (!prev->counter) {

  prev->counter = NICE_TO_TICKS(prev->nice);

  move_last_runqueue(prev);

  }

  goto move_rr_back;

  scheduling_in_interrupt:

  printk("Scheduling in interruptn");

  BUG();

  return;

  }

  void schedule_tail(struct task_struct *prev)

  {

  __schedule_tail(prev);

  #ifdef CONFIG_PREEMPT

  ctx_sw_on();

  #endif

  }

本文转自: http://net.zdnet.com.cn/network_security_zone/2007/1018/565662.shtml
阅读(326) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~