Chinaunix首页 | 论坛 | 博客
  • 博客访问: 585274
  • 博文数量: 146
  • 博客积分: 5251
  • 博客等级: 大校
  • 技术积分: 1767
  • 用 户 组: 普通用户
  • 注册时间: 2006-11-10 15:58
文章分类
文章存档

2010年(12)

2008年(129)

2007年(5)

我的朋友

分类: LINUX

2008-11-11 16:55:47

四:a.out文件格式的加载
a.out模块的处理是在binfmt.aout.c中.如下示:
static struct linux_binfmt aout_format = {
     .module       = THIS_MODULE,
     .load_binary  = load_aout_binary,
     .load_shlib   = load_aout_library,
     .core_dump    = aout_core_dump,
     .min_coredump = PAGE_SIZE
};
对应的加载接口为load_aout_binary().代码如下:
static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
{
     struct exec ex;
     unsigned long error;
     unsigned long fd_offset;
     unsigned long rlim;
     int retval;
 
     //文件头信息匹配
     ex = *((struct exec *) bprm->buf);        /* exec-header */
     if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
          N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
         N_TRSIZE(ex) || N_DRSIZE(ex) ||
         i_size_read(bprm->file->f_path.dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
         return -ENOEXEC;
     }
 
     /*
      * Requires a mmap handler. This prevents people from using a.out
      * as part of an exploit attack against /proc-related vulnerabilities.
      */
      //如果文件不支持OPEN,或者MMAP。无效
     if (!bprm->file->f_op || !bprm->file->f_op->mmap)
         return -ENOEXEC;
 
     //可执行文件正文的起始位置
     //每种类型的正文起始位置
     fd_offset = N_TXTOFF(ex);
 
     /* Check initial limits. This avoids letting people circumvent
      * size limits imposed on them by creating programs with large
      * arrays in the data or bss.
      */
 
     //判断data+bss是否超过了限制
     rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
     if (rlim >= RLIM_INFINITY)
         rlim = ~0;
     if (ex.a_data + ex.a_bss > rlim)
         return -ENOMEM;
 
     /* Flush all traces of the currently running executable */
     //已经取得了足够的信息,是跟当前进程脱离的时候了
     retval = flush_old_exec(bprm);
     if (retval)
         return retval;
 
     /* OK, This is the point of no return */
#if defined(__alpha__)
     SET_AOUT_PERSONALITY(bprm, ex);
#elif defined(__sparc__)
     set_personality(PER_SUNOS);
#if !defined(__sparc_v9__)
     memcpy(¤t->thread.core_exec, &ex, sizeof(struct exec));
#endif
#else
     //设置进程的个性标志
     set_personality(PER_LINUX);
#endif
 
     //设置进程的代码段的起始与终止位置
     current->mm->end_code = ex.a_text +
         (current->mm->start_code = N_TXTADDR(ex));
     //设置进程数段段的起始与终止位置
     current->mm->end_data = ex.a_data +
         (current->mm->start_data = N_DATADDR(ex));
     //设置进程BSS区间
     current->mm->brk = ex.a_bss +
         (current->mm->start_brk = N_BSSADDR(ex));
    
     current->mm->free_area_cache = current->mm->mmap_base;
     current->mm->cached_hole_size = 0;
 
     compute_creds(bprm);
     //进程已经fork 完成了,不再需要PF_FORKNOEXEC
     current->flags &= ~PF_FORKNOEXEC;
#ifdef __sparc__
     if (N_MAGIC(ex) == NMAGIC) {
         loff_t pos = fd_offset;
         /* Fuck me plenty... */
         /* */
         down_write(¤t->mm->mmap_sem); 
         error = do_brk(N_TXTADDR(ex), ex.a_text);
         up_write(¤t->mm->mmap_sem);
         bprm->file->f_op->read(bprm->file, (char *) N_TXTADDR(ex),
                ex.a_text, &pos);
         down_write(¤t->mm->mmap_sem);
         error = do_brk(N_DATADDR(ex), ex.a_data);
         up_write(¤t->mm->mmap_sem);
         bprm->file->f_op->read(bprm->file, (char *) N_DATADDR(ex),
                ex.a_data, &pos);
         goto beyond_if;
     }
#endif
 
     //如果是OMAGIC格式
     if (N_MAGIC(ex) == OMAGIC) {
         unsigned long text_addr, map_size;
         loff_t pos;
 
         text_addr = N_TXTADDR(ex);
 
#if defined(__alpha__) || defined(__sparc__)
         pos = fd_offset;
         map_size = ex.a_text+ex.a_data + PAGE_SIZE - 1;
#else
         pos = 32;
         map_size = ex.a_text+ex.a_data;
#endif
         down_write(¤t->mm->mmap_sem);
         //为进程的代码段分配空间
         error = do_brk(text_addr & PAGE_MASK, map_size);
         up_write(¤t->mm->mmap_sem);
         if (error != (text_addr & PAGE_MASK)) {
              send_sig(SIGKILL, current, 0);
              return error;
         }
 
         //读文件数据读入代码段
         error = bprm->file->f_op->read(bprm->file,
                (char __user *)text_addr,
                ex.a_text+ex.a_data, &pos);
         if ((signed long)error < 0) {
              send_sig(SIGKILL, current, 0);
              return error;
         }
 
         //x86上为一空函数
         flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data);
     } else {
         static unsigned long error_time, error_time2;
 
         //数据段,代码段是否页框对齐
         if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
             (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ)
         {
              printk(KERN_NOTICE "executable not page aligned\n");
              error_time2 = jiffies;
         }
 
         //
         if ((fd_offset & ~PAGE_MASK) != 0 &&
             (jiffies-error_time) > 5*HZ)
         {
              printk(KERN_WARNING
                     "fd_offset is not page aligned. Please convert program: %s\n",
                     bprm->file->f_path.dentry->d_name.name);
              error_time = jiffies;
         }
 
         if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
              //不支持mmap
              loff_t pos = fd_offset;
              down_write(¤t->mm->mmap_sem);
              //分配段空间
              do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
              up_write(¤t->mm->mmap_sem);
              //从文件中读入相关数据
              bprm->file->f_op->read(bprm->file,
                       (char __user *)N_TXTADDR(ex),
                       ex.a_text+ex.a_data, &pos);
              flush_icache_range((unsigned long) N_TXTADDR(ex),
                          (unsigned long) N_TXTADDR(ex) +
                          ex.a_text+ex.a_data);
              goto beyond_if;
         }
 
         //如果支持MMAP。将直接将文件映射到内存即可
         down_write(¤t->mm->mmap_sem);
         error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
              PROT_READ | PROT_EXEC,
              MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
              fd_offset);
         up_write(¤t->mm->mmap_sem);
 
         if (error != N_TXTADDR(ex)) {
              send_sig(SIGKILL, current, 0);
              return error;
         }
 
         down_write(¤t->mm->mmap_sem);
         error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
                   PROT_READ | PROT_WRITE | PROT_EXEC,
                   MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
                   fd_offset + ex.a_text);
         up_write(¤t->mm->mmap_sem);
         if (error != N_DATADDR(ex)) {
              send_sig(SIGKILL, current, 0);
              return error;
         }
     }
beyond_if:
     //设置进程的binfmt
     set_binfmt(&aout_format);
 
     //为BSS段分配空间
     retval = set_brk(current->mm->start_brk, current->mm->brk);
     if (retval < 0) {
         //分配失败,发送SIGKILL信号,杀掉当前进程
         send_sig(SIGKILL, current, 0);
         return retval;
     }
 
     //扩大进程的栈
     retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
     if (retval < 0) {
         /* Someone check-me: is this error path enough? */
         send_sig(SIGKILL, current, 0);
         return retval;
     }
 
     //调整栈空间的布局
     current->mm->start_stack =
         (unsigned long) create_aout_tables((char __user *) bprm->p, bprm);
#ifdef __alpha__
     regs->gp = ex.a_gpvalue;
#endif
     //设置新的EIP与ESP.使其返回到用户空间后,可以开始运行这个程序
     start_thread(regs, ex.a_entry, current->mm->start_stack);
     if (unlikely(current->ptrace & PT_PTRACED)) {
         if (current->ptrace & PT_TRACE_EXEC)
              ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
         else
              send_sig(SIGTRAP, current, 0);
     }
     return 0;
}
首先判断文件的文件头信息,检查是否属于a.out文件.属于不属于a.out再出错退出,让其它module进行选择.
因为execve()系统调用会完全代替进程,因此,在运行该进程之前,先解除父子进程的共享关系,这是由flush_old_exec()完成的.代码如下:
int flush_old_exec(struct linux_binprm * bprm)
{
     char * name;
     int i, ch, retval;
     struct files_struct *files;
     char tcomm[sizeof(current->comm)];
 
     //如果父子进程共享信号处理,脱离其共享关系
     retval = de_thread(current);
     if (retval)
         goto out;
 
      //复制共享的文件
     files = current->files;     /* refcounted so safe to hold */
     retval = unshare_files();
     if (retval)
         goto out;
    
     //进程的用户空间有可能是父进程的复制品.使之独立
 
     //使进程的mm切换为bprm->mm
     //这就是我们之前千亲万苦初始化bprm->mm的原因
     retval = exec_mmap(bprm->mm);
     if (retval)
         goto mmap_failed;
 
     bprm->mm = NULL;       /* We're using it now */
 
     put_files_struct(files);
 
     current->sas_ss_sp = current->sas_ss_size = 0;
 
     if (current->euid == current->uid && current->egid == current->gid)
         set_dumpable(current->mm, 1);
     else
         set_dumpable(current->mm, suid_dumpable);
 
     name = bprm->filename;
 
     /* Copies the binary name from after last slash */
     //取可执行文件的名字
     for (i=0; (ch = *(name++)) != '\0';) {
         if (ch == '/')
              i = 0; /* overwrite what we wrote */
         else
              if (i < (sizeof(tcomm) - 1))
                   tcomm[i++] = ch;
     }
     tcomm[i] = '\0';
     //task->com:保存可执行文件名
     set_task_comm(current, tcomm);
 
     current->flags &= ~PF_RANDOMIZE;
     //flush_thread:只与协处理器和DEBUG有关
     flush_thread();
 
     current->mm->task_size = TASK_SIZE;
 
     if (bprm->e_uid != current->euid || bprm->e_gid != current->egid) {
         suid_keys(current);
         set_dumpable(current->mm, suid_dumpable);
         current->pdeath_signal = 0;
     } else if (file_permission(bprm->file, MAY_READ) ||
              (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) {
         suid_keys(current);
         set_dumpable(current->mm, suid_dumpable);
     }
 
     /* An exec changes our domain. We are no longer part of the thread
        group */
 
     current->self_exec_id++;
     //因为解除了跟父进程的共享关系,所以
     //将信号处理函数改为默认的操作
     flush_signal_handlers(current, 0);
     //关闭打开的文件
     flush_old_files(current->files);
 
     return 0;
 
mmap_failed:
     reset_files_struct(current, files);
out:
     return retval;
}
我们重点分析一下exec_mmap():
static int exec_mmap(struct mm_struct *mm)
{
     struct task_struct *tsk;
     struct mm_struct * old_mm, *active_mm;
 
     tsk = current;
     old_mm = current->mm;
     mm_release(tsk, old_mm);
 
     if (old_mm) {
         down_read(&old_mm->mmap_sem);
         if (unlikely(old_mm->core_waiters)) {
              up_read(&old_mm->mmap_sem);
              return -EINTR;
         }
     }
     task_lock(tsk);
     active_mm = tsk->active_mm;
     tsk->mm = mm;
     tsk->active_mm = mm;
     //切换进程的执行空间.这个过程我们在进程切换跟调度的时候再来做详细的分析
     activate_mm(active_mm, mm);
     task_unlock(tsk);
     arch_pick_mmap_layout(mm);
 
     // 减少old_mm,active_mm的引用计数,如果引用计数为零,则释放其所占
     //空间,或者断开映射
     if (old_mm) {
         up_read(&old_mm->mmap_sem);
         BUG_ON(active_mm != old_mm);
         mmput(old_mm);
         return 0;
     }
     mmdrop(active_mm);
     return 0;
}
值得注意的是mm_release()中有一个重要的操作:
void mm_release(struct task_struct *tsk, struct mm_struct *mm)
{
     struct completion *vfork_done = tsk->vfork_done;
 
     /* Get rid of any cached register state */
     deactivate_mm(tsk, mm);
 
     /* notify parent sleeping on vfork() */
     //如果创建子进程的时候带了CLONE_VFORK。其在子进程已经使用完了
     //是该唤醒父进程了
     if (vfork_done) {
         tsk->vfork_done = NULL;
         complete(vfork_done);
     }
 
     /*
      * If we're exiting normally, clear a user-space tid field if
      * requested.  We leave this alone when dying by signal, to leave
      * the value intact in a core dump, and to save the unnecessary
      * trouble otherwise.  Userland only wants this done for a sys_exit.
      */
     if (tsk->clear_child_tid
         && !(tsk->flags & PF_SIGNALED)
         && atomic_read(&mm->mm_users) > 1) {
         u32 __user * tidptr = tsk->clear_child_tid;
         tsk->clear_child_tid = NULL;
 
         /*
          * We don't check the error code - if userspace has
          * not set up a proper pointer then tough luck.
          */
         put_user(0, tidptr);
         sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0);
     }
}
还记得我们之前讨论过的CLONE_VFOR标志吗?到这里就可以唤醒父进程了.因为此时子进程结束了对父进程空间的共享.
与父进程脱离关系之后,子进程就拥有了自己独立的资源.然后加载数据段和代码段.分配BSS段空间.把栈空间也伸缩适当大小.
之后我们遇到的再一个重点是栈空间的布局.我们来分析这一个过程.
static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
{
     ……
     ……
     current->mm->start_stack =
         (unsigned long) create_aout_tables((char __user *) bprm->p, bprm);
#ifdef __alpha__
     regs->gp = ex.a_gpvalue;
#endif
     start_thread(regs, ex.a_entry, current->mm->start_stack);
     ……
}
Creat_aout_tables()代码如下:
static unsigned long __user *create_aout_tables(char __user *p, struct linux_binprm * bprm)
{
    char __user * __user *argv;
    char __user * __user *envp;
    unsigned long __user *sp;
    //可执行文件的参数个数
    int argc = bprm->argc;
    //环境变量的个数
    int envc = bprm->envc;
 
    //sp初始化成p,也即bprm->p
//对应下面图的初始化状态(1)
    sp = (void __user *)((-(unsigned long)sizeof(char *)) & (unsigned long) p);
#ifdef __sparc__
    /* This imposes the proper stack alignment for a new process. */
    sp = (void __user *) (((unsigned long) sp) & ~7);
    if ((envc+argc+3)&1) --sp;
#endif
#ifdef __alpha__
/* whee.. test-programs are so much fun. */
    put_user(0, --sp);
    put_user(0, --sp);
    if (bprm->loader) {
        put_user(0, --sp);
        put_user(0x3eb, --sp);
        put_user(bprm->loader, --sp);
        put_user(0x3ea, --sp);
    }
    put_user(bprm->exec, --sp);
    put_user(0x3e9, --sp);
#endif
    sp -= envc+1;
    envp = (char __user * __user *) sp;
    sp -= argc+1;
    argv = (char __user * __user *) sp;
#if defined(__i386__) || defined(__mc68000__) || defined(__arm__) || defined(__arch_um__)
    put_user((unsigned long) envp,--sp);
    put_user((unsigned long) argv,--sp);
#endif
    put_user(argc,--sp);
    //对应下面分析图中的(2)
    current->mm->arg_start = (unsigned long) p;
   
    while (argc-->0) {
        char c;
        put_user(p,argv++);
        do {
            get_user(c,p++);
        } while (c);
    }
    put_user(NULL,argv);
    current->mm->arg_end = current->mm->env_start = (unsigned long) p;
    while (envc-->0) {
        char c;
        put_user(p,envp++);
        do {
            get_user(c,p++);
        } while (c);
    }
    put_user(NULL,envp);
    current->mm->env_end = (unsigned long) p;
    //对应分析图中的(3)
    return sp;
}
我们用图来表示上面的操作过程:
 
 
 
对照上面的分析图就很容易看懂代码了.
最后,设置eip的值为可执行文件中main函数对齐的地址,esp为当前栈指针位置,返回到用户空间就可以顺利的执行了.这一过程是start_thread()完成的.
阅读(1674) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~