参照《操作系统:设计与实现》。文章中大部分文字和图片来源于此书。
书籍中以在shell命令端输入命令创建进程为例来讲解EXEC系统调用。它完成的工作如下:
1.检查权限---文件是否可执行?
2.读取文件头得到各段长度和总长度。
3.从调用者处取参数和环境。
4.分配新内存和释放旧内存。
5.把堆栈复制到新的内存映像中。
6.把数据(可能还有正文段)段复制到新的内存映像中。
7.检查处理setuid、setgid位。
8.设置进程表项。
9.告诉内核进程现在是可运行的。
例:
在shell终端输入如下命令。
ls -l f.cg.c
敲入回车键后,上述参数命令以字符串参数的形式传递给shell解释程序,shell解释程序会对此命令进行解释。在解释的过程中将调用库过程:execve("/bin/ls",argv,envp);argv和envp为在shell解释程序中对输入命令行再构造的两个数组如下图的a,在函数execve中将重新构造参数堆栈。如下图所示:
函数最终构造好main函数运行时需要的栈结构,从而进入用户函数main。但在进入用户函数main之前,除了构造栈还有很多工作要做,这些工作在/src/mm/exec.c文件中的do_exec()函数中完成。此函数就完成文章最开始列出的九项工作。但其主要的工作就是为进程分配新的内存,并将可执行文件加载进内存中。所谓的加载就是将ELF格式的可执行文件中的数据段、正文段等二进制数据拷贝到新分配的内存中来。代码如下:
PUBLIC int do_exec()
{
/* Perform the execve(name, argv, envp) call. The user library builds a
* complete stack image, including pointers, args, environ, etc. The stack
* is copied to a buffer inside MM, and then to the new core image.
*/
register struct mproc *rmp;
struct mproc *sh_mp;
int m, r, fd, ft, sn;
static char mbuf[ARG_MAX]; /* buffer for stack and zeroes */
static char name_buf[PATH_MAX]; /* the name of the file to exec */
char *new_sp, *basename;
vir_bytes src, dst, text_bytes, data_bytes, bss_bytes, stk_bytes, vsp;
phys_bytes tot_bytes; /* total space for program, including gap */
long sym_bytes;
vir_clicks sc;
struct stat s_buf;
vir_bytes pc;
/* Do some validity checks. */
rmp = mp;
stk_bytes = (vir_bytes) stack_bytes;
if (stk_bytes > ARG_MAX) return(ENOMEM); /* stack too big */
if (exec_len <= 0 || exec_len > PATH_MAX) return(EINVAL);
/* Get the exec file name and see if the file is executable. */
src = (vir_bytes) exec_name;
dst = (vir_bytes) name_buf;
r = sys_copy(who, D, (phys_bytes) src,
MM_PROC_NR, D, (phys_bytes) dst, (phys_bytes) exec_len);
if (r != OK) return(r); /* file name not in user data segment */
tell_fs(CHDIR, who, FALSE, 0); /* switch to the user's FS environ. */
fd = allowed(name_buf, &s_buf, X_BIT); /* is file executable? */
if (fd < 0) return(fd); /* file was not executable */
/* Read the file header and extract the segment sizes. */
sc = (stk_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT;/*每次至少为CLICK_SHIFT=4096字节*/
m = read_header(fd, &ft, &text_bytes, &data_bytes, &bss_bytes,
&tot_bytes, &sym_bytes, sc, &pc);
if (m < 0) {
close(fd); /* something wrong with header */
return(ENOEXEC);
}
/* Fetch the stack from the user before destroying the old core image.
* 将老的内存中的堆栈中的数据先拷贝保存起来。
*/
src = (vir_bytes) stack_ptr;
dst = (vir_bytes) mbuf;
r = sys_copy(who, D, (phys_bytes) src,
MM_PROC_NR, D, (phys_bytes) dst, (phys_bytes)stk_bytes);
if (r != OK) {
close(fd); /* can't fetch stack (e.g. bad virtual addr) */
return(EACCES);
}
/* Can the process' text be shared with that of one already running?
* 当前要加载的可执行文件在内存中是否已存在?
*/
sh_mp = find_share(rmp, s_buf.st_ino, s_buf.st_dev, s_buf.st_ctime);
/* Allocate new memory and release old memory. Fix map and tell kernel.
* 为进程分配新内存。
*/
r = new_mem(sh_mp, text_bytes, data_bytes, bss_bytes, stk_bytes, tot_bytes);
if (r != OK) {
close(fd); /* insufficient core or program too big */
return(r);
}
/* Save file identification to allow it to be shared. */
rmp->mp_ino = s_buf.st_ino;
rmp->mp_dev = s_buf.st_dev;
rmp->mp_ctime = s_buf.st_ctime;
/* Patch up stack and copy it from MM to new core image. */
vsp = (vir_bytes) rmp->mp_seg[S].mem_vir << CLICK_SHIFT;
vsp += (vir_bytes) rmp->mp_seg[S].mem_len << CLICK_SHIFT;
vsp -= stk_bytes; /*取到新内存堆栈的起始地址,并将老的堆栈数据拷贝过来*/
patch_ptr(mbuf, vsp);
src = (vir_bytes) mbuf;
r = sys_copy(MM_PROC_NR, D, (phys_bytes) src,
who, D, (phys_bytes) vsp, (phys_bytes)stk_bytes);
if (r != OK) panic("do_exec stack copy err", NO_NUM);
/* Read in text and data segments.调用lseek函数读可执行文件,
* 并调用 load_seg()将数据段和正文段读入内存。
*/
if (sh_mp != NULL) {
lseek(fd, (off_t) text_bytes, SEEK_CUR); /* shared: skip text */
} else {
load_seg(fd, T, text_bytes);
}
load_seg(fd, D, data_bytes);
#if (SHADOWING == 1)
if (lseek(fd, (off_t)sym_bytes, SEEK_CUR) == (off_t) -1) ; /* error */
if (relocate(fd, (unsigned char *)mbuf) < 0) ; /* error */
pc += (vir_bytes) rp->mp_seg[T].mem_vir << CLICK_SHIFT;
#endif
close(fd); /* don't need exec file any more */
/* Take care of setuid/setgid bits. */
if ((rmp->mp_flags & TRACED) == 0) { /* suppress if tracing */
if (s_buf.st_mode & I_SET_UID_BIT) {
rmp->mp_effuid = s_buf.st_uid;
tell_fs(SETUID,who, (int)rmp->mp_realuid, (int)rmp->mp_effuid);
}
if (s_buf.st_mode & I_SET_GID_BIT) {
rmp->mp_effgid = s_buf.st_gid;
tell_fs(SETGID,who, (int)rmp->mp_realgid, (int)rmp->mp_effgid);
}
}
/* Save offset to initial argc (for ps) */
rmp->mp_procargs = vsp;
/* Fix 'mproc' fields, tell kernel that exec is done, reset caught sigs. */
for (sn = 1; sn <= _NSIG; sn++) {
if (sigismember(&rmp->mp_catch, sn)) {
sigdelset(&rmp->mp_catch, sn);
rmp->mp_sigact[sn].sa_handler = SIG_DFL;
sigemptyset(&rmp->mp_sigact[sn].sa_mask);
}
}
rmp->mp_flags &= ~SEPARATE; /* turn off SEPARATE bit */
rmp->mp_flags |= ft; /* turn it on for separate I & D files */
new_sp = (char *) vsp;
tell_fs(EXEC, who, 0, 0); /* allow FS to handle FD_CLOEXEC files */
/* System will save command line for debugging, ps(1) output, etc. */
basename = strrchr(name_buf, '/');
if (basename == NULL) basename = name_buf; else basename++;
sys_exec(who, new_sp, rmp->mp_flags & TRACED, basename, pc);
return(OK);
}