一.总体说明
二.代码分析
start_kernel的最后-->rest_init
-
static void rest_init(void)
-
{
-
//init=0xc0105044,flags=0x10e00
-
kernel_thread(init, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
-
unlock_kernel();
-
current->need_resched = 1;
-
cpu_idle();
-
}
在arch/i386/kernel/process.c中 L488
-
int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-
{
-
long retval, d0;
-
-
__asm__ __volatile__(
-
"movl %%esp,%%esi\n\t"
-
"int $0x80\n\t" //第1个系统调用
-
"cmpl %%esp,%%esi\n\t" /* child or parent? */
-
"je 1f\n\t" /* parent - jump */
-
"movl %4,%%eax\n\t"
-
"pushl %%eax\n\t"
-
"call *%5\n\t" /* call fn */
-
"movl %3,%0\n\t" /* exit */
-
"int $0x80\n" //第2个系统调用
-
"1:\t"
-
:"=&a" (retval), "=&S" (d0)
-
:"0" (__NR_clone), "i" (__NR_exit),
-
"r" (arg), "r" (fn),
-
"b" (flags | CLONE_VM)
-
: "memory");
-
return retval;
-
}
在第1个系统调用之前有一些准备工作
eax=NR_clone,
-
int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-
{
-
long retval, d0;
-
//fn=init=0xc0105044,arg=NULL, flags=0x10e00
-
__asm__ __volatile__(
-
“movl _NR_clone, %eax" //将系统调用号eax设为NR_clone
-
"movl flags|CLONE_VM, %ebx" //将flags放在ebx中
-
"movl %%esp,%%esi"
-
"int $0x80" //第1个系统调用
-
"cmpl %%esp,%%esi" //fork之后子进程会获得新esp,但父进程还是用它原先的esp,根据这一点可以区分父子进程
-
"je 1f " //相等则为父进程,直接跳出 parent-->jump
-
-
"movl %4,%%eax" //子进程:将args压栈
-
"pushl %%eax" //子进程:
-
"call *%5" //子进程:调用fn=init函数
-
"movl %3,%0" //子进程执行完fn=init函数后调用exit
-
"int $0x80" //子进程:第2个系统调用exit
-
-
"1:\t"
-
:"=&a" (retval), "=&S" (d0)
-
:"0" (__NR_clone), "i" (__NR_exit),
-
"r" (arg), "r" (fn),
-
"b" (flags | CLONE_VM)
-
: "memory");
-
return retval;
-
}
-
在arch/i386/kernel/process.c中L747
-
asmlinkage int sys_clone(struct pt_regs regs)
-
{
-
unsigned long clone_flags;
-
unsigned long newsp;
-
$7 = {ebx = 0x10f00, ecx = 0x78, edx = 0x0, esi = 0xc02e1fb8, edi = 0xc0105044, ebp = 0xe000, eax = 0x78, xds = 0x18, xes = 0x18, orig_eax = 0x78, eip = 0xc0105ac7, xcs = 0x10,
eflags = 0x206, esp = 0xc0105044, xss = 0x400}
-
clone_flags = regs.ebx; //clone_flags=CLONE_FS|CLONE_FILES|CLONE_SINGNAL|CLONE_VM
-
newsp = regs.ecx; //0x78
-
if (!newsp)
-
newsp = regs.esp;
-
return do_fork(clone_flags, newsp, ®s, 0);
-
}
-
int do_fork(unsigned long clone_flags, unsigned long stack_start,
-
struct pt_regs *regs, unsigned long stack_size)
-
{
-
int retval;
-
struct task_struct *p;
-
struct completion vfork;
-
-
retval = -EPERM;
-
-
/*
-
* CLONE_PID is only allowed for the initial SMP swapper
-
* calls
-
*/
-
if (clone_flags & CLONE_PID) {
-
if (current->pid)
-
goto fork_out;
-
}
-
-
retval = -ENOMEM;
-
//alloc_task_struct=__get_free_pages(GFP_KERNEL,1),最后的参数1,代表要分配2页内存
-
p = alloc_task_struct();
-
if (!p)
-
goto fork_out;
-
-
*p = *current;
-
-
retval = -EAGAIN;
-
/*
-
* Check if we are over our maximum process limit, but be sure to
-
* exclude root. This is needed to make it possible for login and
-
* friends to set the per-user process limit to something lower
-
* than the amount of processes root is running. -- Rik
-
*/
-
if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur
-
&& !capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE))
-
goto bad_fork_free;
-
-
atomic_inc(&p->user->__count);
-
atomic_inc(&p->user->processes);
-
-
/*
-
* Counter increases are protected by
-
* the kernel lock so nr_threads can't
-
* increase under us (but it may decrease).
-
*/
-
if (nr_threads >= max_threads)
-
goto bad_fork_cleanup_count;
-
-
get_exec_domain(p->exec_domain);
-
-
if (p->binfmt && p->binfmt->module)
-
__MOD_INC_USE_COUNT(p->binfmt->module);
-
-
p->did_exec = 0;
-
p->swappable = 0;
-
p->state = TASK_UNINTERRUPTIBLE;
-
-
copy_flags(clone_flags, p);
-
p->pid = get_pid(clone_flags);
-
-
p->run_list.next = NULL;
-
p->run_list.prev = NULL;
-
-
p->p_cptr = NULL;
-
init_waitqueue_head(&p->wait_chldexit);
-
p->vfork_done = NULL;
-
if (clone_flags & CLONE_VFORK) {
-
p->vfork_done = &vfork;
-
init_completion(&vfork);
-
}
-
spin_lock_init(&p->alloc_lock);
-
-
p->sigpending = 0;
-
init_sigpending(&p->pending);
-
-
p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
-
p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
-
init_timer(&p->real_timer);
-
p->real_timer.data = (unsigned long) p;
-
-
p->leader = 0; /* session leadership doesn't inherit */
-
p->tty_old_pgrp = 0;
-
p->times.tms_utime = p->times.tms_stime = 0;
-
p->times.tms_cutime = p->times.tms_cstime = 0;
-
#ifdef CONFIG_SMP
-
{
-
int i;
-
p->cpus_runnable = ~0UL;
-
p->processor = current->processor;
-
/* ?? should we just memset this ?? */
-
for(i = 0; i < smp_num_cpus; i++)
-
p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0;
-
spin_lock_init(&p->sigmask_lock);
-
}
-
#endif
-
p->lock_depth = -1; /* -1 = no lock */
-
p->start_time = jiffies;
-
-
INIT_LIST_HEAD(&p->local_pages);
-
-
retval = -ENOMEM;
-
/* copy all the process information */
-
if (copy_files(clone_flags, p)) //因为这儿有了CLONE_FILES标志,所以不会真的去分配files_struct,只是oldf->count++
-
goto bad_fork_cleanup;
-
if (copy_fs(clone_flags, p)) //因为这儿有了CLONE_FS标志,所以不会真的去分配fs_struct,只是current->fs->count++
-
goto bad_fork_cleanup_files;
-
if (copy_sighand(clone_flags, p)) //因为这儿有了CLONE_SINGNAL标志,所以不会真的去分配fs_struct,只是current->fs->count++
-
goto bad_fork_cleanup_fs;
-
if (copy_mm(clone_flags, p)) //因为这儿有了CLONE_VM标志,所以不会真的去分配mm_struct,只是oldmm->mm_users++
-
goto bad_fork_cleanup_sighand;
-
retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
-
if (retval)
-
goto bad_fork_cleanup_mm;
-
p->semundo = NULL;
-
-
/* Our parent execution domain becomes current domain
-
These must match for thread signalling to apply */
-
-
p->parent_exec_id = p->self_exec_id;
-
-
/* ok, now we should be set up.. */
-
p->swappable = 1;
-
p->exit_signal = clone_flags & CSIGNAL;
-
p->pdeath_signal = 0;
-
-
/*
-
* "share" dynamic priority between parent and child, thus the
-
* total amount of dynamic priorities in the system doesnt change,
-
* more scheduling fairness. This is only important in the first
-
* timeslice, on the long run the scheduling behaviour is unchanged.
-
*/
-
p->counter = (current->counter + 1) >> 1;
-
current->counter >>= 1;
-
if (!current->counter)
-
current->need_resched = 1;
-
-
/*
-
* Ok, add it to the run-queues and make it
-
* visible to the rest of the system.
-
*
-
* Let it
-
*/
-
retval = p->pid;
-
p->tgid = retval;
-
INIT_LIST_HEAD(&p->thread_group);
-
-
/* Need tasklist lock for parent etc */
-
write_lock_irq(&tasklist_lock);
-
-
/* CLONE_PARENT and CLONE_THREAD re-use the old parent */
-
p->p_opptr = current->p_opptr;
-
p->p_pptr = current->p_pptr;
-
if (!(clone_flags & (CLONE_PARENT | CLONE_THREAD))) {
-
p->p_opptr = current;
-
if (!(p->ptrace & PT_PTRACED))
-
p->p_pptr = current;
-
}
-
-
if (clone_flags & CLONE_THREAD) {
-
p->tgid = current->tgid;
-
list_add(&p->thread_group, ¤t->thread_group);
-
}
-
-
SET_LINKS(p);
-
hash_pid(p);
-
nr_threads++;
-
write_unlock_irq(&tasklist_lock);
-
-
if (p->ptrace & PT_PTRACED)
-
send_sig(SIGSTOP, p, 1);
-
-
wake_up_process(p); /* do this last */
-
++total_forks;
-
if (clone_flags & CLONE_VFORK)
-
wait_for_completion(&vfork);
-
-
fork_out:
-
return retval;
-
-
bad_fork_cleanup_mm:
-
exit_mm(p);
-
bad_fork_cleanup_sighand:
-
exit_sighand(p);
-
bad_fork_cleanup_fs:
-
exit_fs(p); /* blocking */
-
bad_fork_cleanup_files:
-
exit_files(p); /* blocking */
-
bad_fork_cleanup:
-
put_exec_domain(p->exec_domain);
-
if (p->binfmt && p->binfmt->module)
-
__MOD_DEC_USE_COUNT(p->binfmt->module);
-
bad_fork_cleanup_count:
-
atomic_dec(&p->user->processes);
-
free_uid(p->user);
-
bad_fork_free:
-
free_task_struct(p);
-
goto fork_out;
-
}
参数p=0xf7dee000,在do_fork中新分配的2页内存用作子进程的task_struct
-
int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
-
unsigned long unused, struct task_struct * p, struct pt_regs * regs)
-
{
-
struct pt_regs * childregs;
-
//这儿稍不留神就会看错-->是先转为struct pt_regs后再减1,意思是在栈顶留出一个struct pt_regs大小的空间
-
childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1;
-
//先将父进程的全部regs复制过来,然后再作调整
-
struct_cpy(childregs, regs); //即memcpy(childregs, regs, sizeof(*(regs)));
-
childregs->eax = 0; //child的eax=0,这也就是为什么fork后子进程返回0的原因
-
childregs->esp = esp; //这个esp是在sys_clone时的regs.ecx=0x78
-
-
p->thread.esp = (unsigned long) childregs; //执行后p->thread.esp=0xf7deffc4=(p-sizeof(pt_regs))
-
p->thread.esp0 = (unsigned long) (childregs+1); //执行后p->thread.esp0=0xf7df0000=(p),即将两页内存的最高端作为栈顶
-
-
p->thread.eip = (unsigned long) ret_from_fork; //下次调度时子进程执行的命令是ret_form_fork
-
-
savesegment(fs,p->thread.fs);
-
savesegment(gs,p->thread.gs);
-
-
unlazy_fpu(current);
-
struct_cpy(&p->thread.i387, ¤t->thread.i387);
-
-
return 0;
-
}
上图出自《linux内核情景分析》P306
阅读(1743) | 评论(0) | 转发(0) |