Chinaunix首页 | 论坛 | 博客
  • 博客访问: 2109871
  • 博文数量: 438
  • 博客积分: 3871
  • 博客等级: 中校
  • 技术积分: 6075
  • 用 户 组: 普通用户
  • 注册时间: 2011-09-10 00:11
个人简介

邮箱: wangcong02345@163.com

文章分类

全部博文(438)

文章存档

2017年(15)

2016年(119)

2015年(91)

2014年(62)

2013年(56)

2012年(79)

2011年(16)

分类: LINUX

2016-12-21 14:12:35

一.总体说明


二.代码分析
start_kernel的最后-->rest_init
  1. static void rest_init(void)
  2. {
  3.     //init=0xc0105044,flags=0x10e00
  4.     kernel_thread(init, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);   
  5.     unlock_kernel();
  6.     current->need_resched = 1;
  7.     cpu_idle();
  8. }


在arch/i386/kernel/process.c中 L488
  1. int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  2. {
  3.     long retval, d0;

  4.     __asm__ __volatile__(
  5.         "movl %%esp,%%esi\n\t"
  6.         "int $0x80\n\t"           //第1个系统调用
  7.         "cmpl %%esp,%%esi\n\t"    /* child or parent? */
  8.         "je 1f\n\t"               /* parent - jump */
  9.         "movl %4,%%eax\n\t"
  10.         "pushl %%eax\n\t"        
  11.         "call *%5\n\t"           /* call fn */
  12.         "movl %3,%0\n\t"         /* exit */
  13.         "int $0x80\n"            //第2个系统调用
  14.         "1:\t"
  15.         :"=&a" (retval), "=&S" (d0)
  16.         :"0" (__NR_clone), "i" (__NR_exit),
  17.          "r" (arg), "r" (fn),
  18.          "b" (flags | CLONE_VM)
  19.         : "memory");
  20.     return retval;
  21. }
在第1个系统调用之前有一些准备工作
eax=NR_clone, 

  1. int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  2. {
  3.     long retval, d0;
  4. //fn=init=0xc0105044,arg=NULL, flags=0x10e00
  5.     __asm__ __volatile__(
  6.         “movl _NR_clone, %eax"       //将系统调用号eax设为NR_clone
  7.         "movl flags|CLONE_VM, %ebx"  //将flags放在ebx中
  8.         "movl %%esp,%%esi"
  9.         "int $0x80"             //第1个系统调用
  10.         "cmpl %%esp,%%esi"      //fork之后子进程会获得新esp,但父进程还是用它原先的esp,根据这一点可以区分父子进
  11.         "je 1f "                //相等则为父进程,直接跳出 parent-->jump
  12.         
  13.         "movl %4,%%eax"         //子进程:将args压栈
  14.         "pushl %%eax"           //子进程:
  15.         "call *%5"              //子进程:调用fn=init函数
  16.         "movl %3,%0"            //子进程执行完fn=init函数后调用exit
  17.         "int $0x80"             //子进程:第2个系统调用exit
  18.         
  19.         "1:\t"
  20.         :"=&a" (retval), "=&S" (d0)
  21.         :"0" (__NR_clone), "i" (__NR_exit),
  22.          "r" (arg), "r" (fn),
  23.          "b" (flags | CLONE_VM)
  24.         : "memory");
  25.     return retval;
  26. }



在arch/i386/kernel/process.c中L747
  1. asmlinkage int sys_clone(struct pt_regs regs)
  2. {
  3.     unsigned long clone_flags;
  4.     unsigned long newsp;
  5. $7 = {ebx = 0x10f00, ecx = 0x78, edx = 0x0, esi = 0xc02e1fb8, edi = 0xc0105044, ebp = 0xe000, eax = 0x78, xds = 0x18, xes = 0x18, orig_eax = 0x78, eip = 0xc0105ac7, xcs = 0x10, 
      eflags = 0x206, esp = 0xc0105044, xss = 0x400}

  6.     clone_flags = regs.ebx;   //clone_flags=CLONE_FS|CLONE_FILES|CLONE_SINGNAL|CLONE_VM
  7.     newsp = regs.ecx;         //0x78
  8.     if (!newsp)
  9.         newsp = regs.esp;
  10.     return do_fork(clone_flags, newsp, &regs, 0);
  11. }




  1. int do_fork(unsigned long clone_flags, unsigned long stack_start,
  2.      struct pt_regs *regs, unsigned long stack_size)
  3. {
  4.     int retval;
  5.     struct task_struct *p;
  6.     struct completion vfork;

  7.     retval = -EPERM;

  8.     /*
  9.      * CLONE_PID is only allowed for the initial SMP swapper
  10.      * calls
  11.      */
  12.     if (clone_flags & CLONE_PID) {
  13.         if (current->pid)
  14.             goto fork_out;
  15.     }

  16.     retval = -ENOMEM;
  17.     //alloc_task_struct=__get_free_pages(GFP_KERNEL,1),最后的参数1,代表要分配2页内存
  18.     p = alloc_task_struct();    
  19.     if (!p)
  20.         goto fork_out;

  21.     *p = *current;

  22.     retval = -EAGAIN;
  23.     /*
  24.      * Check if we are over our maximum process limit, but be sure to
  25.      * exclude root. This is needed to make it possible for login and
  26.      * friends to set the per-user process limit to something lower
  27.      * than the amount of processes root is running. -- Rik
  28.      */
  29.     if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur
  30.      && !capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE))
  31.         goto bad_fork_free;

  32.     atomic_inc(&p->user->__count);
  33.     atomic_inc(&p->user->processes);

  34.     /*
  35.      * Counter increases are protected by
  36.      * the kernel lock so nr_threads can't
  37.      * increase under us (but it may decrease).
  38.      */
  39.     if (nr_threads >= max_threads)
  40.         goto bad_fork_cleanup_count;
  41.     
  42.     get_exec_domain(p->exec_domain);

  43.     if (p->binfmt && p->binfmt->module)
  44.         __MOD_INC_USE_COUNT(p->binfmt->module);

  45.     p->did_exec = 0;
  46.     p->swappable = 0;
  47.     p->state = TASK_UNINTERRUPTIBLE;

  48.     copy_flags(clone_flags, p);
  49.     p->pid = get_pid(clone_flags);

  50.     p->run_list.next = NULL;
  51.     p->run_list.prev = NULL;

  52.     p->p_cptr = NULL;
  53.     init_waitqueue_head(&p->wait_chldexit);
  54.     p->vfork_done = NULL;
  55.     if (clone_flags & CLONE_VFORK) {
  56.         p->vfork_done = &vfork;
  57.         init_completion(&vfork);
  58.     }
  59.     spin_lock_init(&p->alloc_lock);

  60.     p->sigpending = 0;
  61.     init_sigpending(&p->pending);

  62.     p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
  63.     p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
  64.     init_timer(&p->real_timer);
  65.     p->real_timer.data = (unsigned long) p;

  66.     p->leader = 0;        /* session leadership doesn't inherit */
  67.     p->tty_old_pgrp = 0;
  68.     p->times.tms_utime = p->times.tms_stime = 0;
  69.     p->times.tms_cutime = p->times.tms_cstime = 0;
  70. #ifdef CONFIG_SMP
  71.     {
  72.         int i;
  73.         p->cpus_runnable = ~0UL;
  74.         p->processor = current->processor;
  75.         /* ?? should we just memset this ?? */
  76.         for(i = 0; i < smp_num_cpus; i++)
  77.             p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0;
  78.         spin_lock_init(&p->sigmask_lock);
  79.     }
  80. #endif
  81.     p->lock_depth = -1;        /* -1 = no lock */
  82.     p->start_time = jiffies;

  83.     INIT_LIST_HEAD(&p->local_pages);

  84.     retval = -ENOMEM;
  85.     /* copy all the process information */
  86.     if (copy_files(clone_flags, p))      //因为这儿有了CLONE_FILES标志,所以不会真的分配files_struct,只是oldf->count++
  87.         goto bad_fork_cleanup;
  88.     if (copy_fs(clone_flags, p))         //因为这儿有了CLONE_FS标志,所以不会真的分配fs_struct,只是current->fs->count++
  89.         goto bad_fork_cleanup_files;
  90.     if (copy_sighand(clone_flags, p))    //因为这儿有了CLONE_SINGNAL标志,所以不会真的分配fs_struct,只是current->fs->count++
  91.         goto bad_fork_cleanup_fs;
  92.     if (copy_mm(clone_flags, p))         //因为这儿有了CLONE_VM标志,所以不会真的分配mm_struct,只是oldmm->mm_users++
  93.         goto bad_fork_cleanup_sighand;
  94.     retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
  95.     if (retval)
  96.         goto bad_fork_cleanup_mm;
  97.     p->semundo = NULL;
  98.     
  99.     /* Our parent execution domain becomes current domain
  100.      These must match for thread signalling to apply */
  101.     
  102.     p->parent_exec_id = p->self_exec_id;

  103.     /* ok, now we should be set up.. */
  104.     p->swappable = 1;
  105.     p->exit_signal = clone_flags & CSIGNAL;
  106.     p->pdeath_signal = 0;

  107.     /*
  108.      * "share" dynamic priority between parent and child, thus the
  109.      * total amount of dynamic priorities in the system doesnt change,
  110.      * more scheduling fairness. This is only important in the first
  111.      * timeslice, on the long run the scheduling behaviour is unchanged.
  112.      */
  113.     p->counter = (current->counter + 1) >> 1;
  114.     current->counter >>= 1;
  115.     if (!current->counter)
  116.         current->need_resched = 1;

  117.     /*
  118.      * Ok, add it to the run-queues and make it
  119.      * visible to the rest of the system.
  120.      *
  121.      * Let it
  122.      */
  123.     retval = p->pid;
  124.     p->tgid = retval;
  125.     INIT_LIST_HEAD(&p->thread_group);

  126.     /* Need tasklist lock for parent etc */
  127.     write_lock_irq(&tasklist_lock);

  128.     /* CLONE_PARENT and CLONE_THREAD re-use the old parent */
  129.     p->p_opptr = current->p_opptr;
  130.     p->p_pptr = current->p_pptr;
  131.     if (!(clone_flags & (CLONE_PARENT | CLONE_THREAD))) {
  132.         p->p_opptr = current;
  133.         if (!(p->ptrace & PT_PTRACED))
  134.             p->p_pptr = current;
  135.     }

  136.     if (clone_flags & CLONE_THREAD) {
  137.         p->tgid = current->tgid;
  138.         list_add(&p->thread_group, &current->thread_group);
  139.     }

  140.     SET_LINKS(p);
  141.     hash_pid(p);
  142.     nr_threads++;
  143.     write_unlock_irq(&tasklist_lock);

  144.     if (p->ptrace & PT_PTRACED)
  145.         send_sig(SIGSTOP, p, 1);

  146.     wake_up_process(p);        /* do this last */
  147.     ++total_forks;
  148.     if (clone_flags & CLONE_VFORK)
  149.         wait_for_completion(&vfork);

  150. fork_out:
  151.     return retval;

  152. bad_fork_cleanup_mm:
  153.     exit_mm(p);
  154. bad_fork_cleanup_sighand:
  155.     exit_sighand(p);
  156. bad_fork_cleanup_fs:
  157.     exit_fs(p); /* blocking */
  158. bad_fork_cleanup_files:
  159.     exit_files(p); /* blocking */
  160. bad_fork_cleanup:
  161.     put_exec_domain(p->exec_domain);
  162.     if (p->binfmt && p->binfmt->module)
  163.         __MOD_DEC_USE_COUNT(p->binfmt->module);
  164. bad_fork_cleanup_count:
  165.     atomic_dec(&p->user->processes);
  166.     free_uid(p->user);
  167. bad_fork_free:
  168.     free_task_struct(p);
  169.     goto fork_out;
  170. }


参数p=0xf7dee000,在do_fork中新分配的2页内存用作子进程的task_struct
  1. int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
  2.     unsigned long unusedstruct task_struct * p, struct pt_regs * regs)
  3. {
  4.     struct pt_regs * childregs;
  5. //这儿稍不留神就会看错-->是先转为struct pt_regs后再减1,意思是在栈顶留出一个struct pt_regs大小的空间
  6.     childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1;
  7. //先将父进程的全部regs复制过来,然后再作调整
  8.     struct_cpy(childregs, regs);    //即memcpy(childregs, regs, sizeof(*(regs)));
  9.     childregs->eax = 0;             //child的eax=0,这也就是为什么fork后子进程返回0的原因
  10.     childregs->esp = esp;           //这个esp是在sys_clone时的regs.ecx=0x78

  11.     p->thread.esp = (unsigned long) childregs;       //执行后p->thread.esp=0xf7deffc4=(p-sizeof(pt_regs))
  12.     p->thread.esp0 = (unsigned long) (childregs+1);  //执行后p->thread.esp0=0xf7df0000=(p),即将两页内存的最高端作为栈

  13.     p->thread.eip = (unsigned long) ret_from_fork;   //下次调度时子进程执行的命令是ret_form_fork

  14.     savesegment(fs,p->thread.fs);
  15.     savesegment(gs,p->thread.gs);

  16.     unlazy_fpu(current);
  17.     struct_cpy(&p->thread.i387, &current->thread.i387);

  18.     return 0;
  19. }

上图出自《linux内核情景分析》P306


阅读(1668) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~