------------------------------------------
本文系本站原创,欢迎转载!
转载请注明出处:http://ericxiao.cublog.cn/
------------------------------------------
在进程运行结束后,我们会显示的调用exit()或者return退出正在运行的进程,如果调用return的话,编译器会自己加上exit().此时,保存子进程的一部份信息是很有必要的,因为父进程可以读取这些消息而取得子进程的退出状态.如果子进程退出.但父进程没有用wait(),这就成为了我们常说的僵尸进程,exit()系统调用在内核中的相应接口为sys_exit(),我们来跟踪一下,看下内核是如何处理这个过程的,
fastcall NORET_TYPE void do_exit(long code)
{
struct task_struct *tsk = current;
int group_dead;
//选择编译函数
profile_task_exit(tsk);
WARN_ON(atomic_read(&tsk->fs_excl));
//不可以在中断上下文或者是0号进程使用该函数
if (unlikely(in_interrupt()))
panic("Aiee, killing interrupt handler!");
if (unlikely(!tsk->pid))
panic("Attempted to kill the idle task!");
//如果定义了PT_TRACE_EXIT.进程通出时,发出通告
if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
current->ptrace_message = code;
ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP);
}
/*
* We're taking recursive faults here in do_exit. Safest is to just
* leave this task alone and wait for reboot.
*/
//如果进程正处理退出状态,那就是说明上一次的exit失败
//这时最好的办法就是让其孤立,并等待设备重启
if (unlikely(tsk->flags & PF_EXITING)) {
printk(KERN_ALERT
"Fixing recursive fault but reboot is needed!\n");
/*
* We can do this unlocked here. The futex code uses
* this flag just to verify whether the pi state
* cleanup has been done or not. In the worst case it
* loops once more. We pretend that the cleanup was
* done as there is no way to return. Either the
* OWNER_DIED bit is set by now or we push the blocked
* task into the wait for ever nirwana as well.
*/
tsk->flags |= PF_EXITPIDONE;
if (tsk->io_context)
exit_io_context();
set_current_state(TASK_UNINTERRUPTIBLE);
schedule();
}
//设置PF_EXITING:表示进程正在退出
tsk->flags |= PF_EXITING;
/*
* tsk->flags are checked in the futex code to protect against
* an exiting task cleaning up the robust pi futexes.
*/
smp_mb();
spin_unlock_wait(&tsk->pi_lock);
if (unlikely(in_atomic()))
printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
current->comm, task_pid_nr(current),
preempt_count());
acct_update_integrals(tsk);
if (tsk->mm) {
update_hiwater_rss(tsk->mm);
update_hiwater_vm(tsk->mm);
}
//tsk->signal->live:进程组中的进程数
//如果&tsk->signal->live -1为零返回为真
group_dead = atomic_dec_and_test(&tsk->signal->live);
if (group_dead) {
exit_child_reaper(tsk);
hrtimer_cancel(&tsk->signal->real_timer);
exit_itimers(tsk->signal);
}
acct_collect(code, group_dead);
#ifdef CONFIG_FUTEX
if (unlikely(tsk->robust_list))
exit_robust_list(tsk);
#ifdef CONFIG_COMPAT
if (unlikely(tsk->compat_robust_list))
compat_exit_robust_list(tsk);
#endif
#endif
if (group_dead)
tty_audit_exit();
if (unlikely(tsk->audit_context))
audit_free(tsk);
//退出状态码
tsk->exit_code = code;
taskstats_exit(tsk, group_dead);
//退出进程所占用的空间
exit_mm(tsk);
if (group_dead)
acct_process();
//从进程的信号从IPC队列中删除
exit_sem(tsk);
//
__exit_files(tsk);
//关闭打开的文件
__exit_fs(tsk);
check_stack_usage();
exit_thread();
cgroup_exit(tsk, 1);
exit_keys(tsk);
//进程组全部退出且当前进程是进程组的组长
if (group_dead && tsk->signal->leader)
//脱离当前的tty 并向进程显示终端的组发送SIGHUP 和SIGCONT
disassociate_ctty(1);
//减少模块的引用计数
module_put(task_thread_info(tsk)->exec_domain->module);
if (tsk->binfmt)
module_put(tsk->binfmt->module);
proc_exit_connector(tsk);
//更新进程的亲属关系.并给父进程发送相应的信号
exit_notify(tsk);
#ifdef CONFIG_NUMA
mpol_free(tsk->mempolicy);
tsk->mempolicy = NULL;
#endif
#ifdef CONFIG_FUTEX
/*
* This must happen late, after the PID is not
* hashed anymore:
*/
if (unlikely(!list_empty(&tsk->pi_state_list)))
exit_pi_state_list(tsk);
if (unlikely(current->pi_state_cache))
kfree(current->pi_state_cache);
#endif
/*
* Make sure we are holding no locks:
*/
//Debug函数
debug_check_no_locks_held(tsk);
/*
* We can do this unlocked here. The futex code uses this flag
* just to verify whether the pi state cleanup has been done
* or not. In the worst case it loops once more.
*/
//进程退出已经完成了,设置PF_EXITPIDONE
tsk->flags |= PF_EXITPIDONE;
if (tsk->io_context)
exit_io_context();
if (tsk->splice_pipe)
__free_pipe_info(tsk->splice_pipe);
preempt_disable();
/* causes final put_task_struct in finish_task_switch(). */
//设置进程的状态为TASK_DEAD
tsk->state = TASK_DEAD;
//调度另一个进程运行
schedule();
BUG();
/* Avoid "noreturn function does return". */
for (;;)
cpu_relax(); /* For when BUG is null */
}
这个函数涉及到很多的子系统,我们暂时只分析跟内存相关的部份.do_exit()不参在中断上下文中使用,不能在0号进程中使用.因为这个函数很释放进程的所占用的空间.如果在中断或者是0号进程中删除进程的资源,将会造成灾难性的后果.
释放进程所占用的空间的操作是在exit_mm()中完成的.对应的代码如下所示:
static void exit_mm(struct task_struct * tsk)
{
struct mm_struct *mm = tsk->mm;
mm_release(tsk, mm);
if (!mm)
return;
down_read(&mm->mmap_sem);
if (mm->core_waiters) {
up_read(&mm->mmap_sem);
down_write(&mm->mmap_sem);
if (!--mm->core_waiters)
complete(mm->core_startup_done);
up_write(&mm->mmap_sem);
wait_for_completion(&mm->core_done);
down_read(&mm->mmap_sem);
}
atomic_inc(&mm->mm_count);
BUG_ON(mm != tsk->active_mm);
/* more a memory barrier than a real lock */
task_lock(tsk);
tsk->mm = NULL;
up_read(&mm->mmap_sem);
enter_lazy_tlb(mm, current);
/* We don't want this task to be frozen prematurely */
clear_freeze_flag(tsk);
task_unlock(tsk);
mmput(mm);
}
mm_release()在之前已经分析过.它的作用是在定义了CLONE_VFORK的情况下,唤醒父进程.因为此时子进程不再需要使用内核栈堆了,父进程可以自由的从系统调用中返回.另外一个重要的子函数是mmput().它释放进程的VMA结构和所映射的页面.我们在之前内存管理部份已经分析过了,在这里不再赘述.
另外,在进程退出之前,要另找一个进程管理他的子进程,而且,进程在退出的时候,必须向其父进程发送相关信号。这个操作是在exit_notify()完成的。代码如下:
static void exit_notify(struct task_struct *tsk)
{
int state;
struct task_struct *t;
struct pid *pgrp;
if (signal_pending(tsk) && !(tsk->signal->flags & SIGNAL_GROUP_EXIT)
&& !thread_group_empty(tsk)) {
/*
* This occurs when there was a race between our exit
* syscall and a group signal choosing us as the one to
* wake up. It could be that we are the only thread
* alerted to check for pending signals, but another thread
* should be woken now to take the signal since we will not.
* Now we'll wake all the threads in the group just to make
* sure someone gets all the pending signals.
*/
spin_lock_irq(&tsk->sighand->siglock);
for (t = next_thread(tsk); t != tsk; t = next_thread(t))
if (!signal_pending(t) && !(t->flags & PF_EXITING))
recalc_sigpending_and_wake(t);
spin_unlock_irq(&tsk->sighand->siglock);
}
/*
* This does two things:
*
* A. Make init inherit all the child processes
* B. Check to see if any process groups have become orphaned
* as a result of our exiting, and if they have any stopped
* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
*/
//更新进程下面的所有子进程的父子关系
//即为其子进程委派一个新的父进程
forget_original_parent(tsk);
exit_task_namespaces(tsk);
write_lock_irq(&tasklist_lock);
/*
* Check to see if any process groups have become orphaned
* as a result of our exiting, and if they have any stopped
* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
*
* Case i: Our father is in a different pgrp than we are
* and we were the only connection outside, so our pgrp
* is about to become orphaned.
*/
//检查是否会因为进程退出造成孤儿进程的情况
//按照POSIX 3.2.2.2标准. 如果孤儿进程组中有终止了的任务.必须要整个
//组发送SIFHUP 和SIGCONT信号
t = tsk->real_parent;
pgrp = task_pgrp(tsk);
if ((task_pgrp(t) != pgrp) &&
(task_session(t) == task_session(tsk)) &&
will_become_orphaned_pgrp(pgrp, tsk) &&
has_stopped_jobs(pgrp)) {
__kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);
__kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);
}
/* Let father know we died
*
* Thread signals are configurable, but you aren't going to use
* that to send signals to arbitary processes.
* That stops right now.
*
* If the parent exec id doesn't match the exec id we saved
* when we started then we know the parent has changed security
* domain.
*
* If our self_exec id doesn't match our parent_exec_id then
* we have changed execution domain as these two values started
* the same after a fork.
*/
//如果tsk->exit_signal != -1(进程退出时,必须向父进程发送信号)
//但退出信号不是SIGCHLD
//强制定义退出信号为SIGCHLD
if (tsk->exit_signal != SIGCHLD && tsk->exit_signal != -1 &&
( tsk->parent_exec_id != t->self_exec_id ||
tsk->self_exec_id != tsk->parent_exec_id)
&& !capable(CAP_KILL))
tsk->exit_signal = SIGCHLD;
/* If something other than our normal parent is ptracing us, then
* send it a SIGCHLD instead of honoring exit_signal. exit_signal
* only has special meaning to our real parent.
*/
//向父进程发送信号,如果进程处理被跟踪状态,则进程无条件
//地向父进程发送SIGCHLD
if (tsk->exit_signal != -1 && thread_group_empty(tsk)) {
int signal = tsk->parent == tsk->real_parent ? tsk->exit_signal : SIGCHLD;
do_notify_parent(tsk, signal);
} else if (tsk->ptrace) {
do_notify_parent(tsk, SIGCHLD);
}
//如果定义了进程退出时向父进程发送信号,则设置进程状态
//为EXIT_ZOMBIE 否则为EXIT_DEAD
state = EXIT_ZOMBIE;
if (tsk->exit_signal == -1 && likely(!tsk->ptrace))
state = EXIT_DEAD;
tsk->exit_state = state;
//如果进程是进程组的首进程.则唤醒进程组的另一进程
if (thread_group_leader(tsk) &&
tsk->signal->notify_count < 0 &&
tsk->signal->group_exit_task)
wake_up_process(tsk->signal->group_exit_task);
write_unlock_irq(&tasklist_lock);
/* If the process is dead, release it - nobody will wait for it */
//如果退出状态是EXIT_DEAD.那就是说父进程不需要子进程的退出状
//态,直接将进程的内核栈释放掉
if (state == EXIT_DEAD)
release_task(tsk);
}
forget_original_parent()是其中比较重要的操作,重点分析一下:
static void forget_original_parent(struct task_struct *father)
{
struct task_struct *p, *n, *reaper = father;
struct list_head ptrace_dead;
INIT_LIST_HEAD(&ptrace_dead);
write_lock_irq(&tasklist_lock);
//为进程的子进程寻找新的父进程
//如果当前进程是线程,则线程组中的下一个线程为其子进程
//的父进程,否则,其子进程新的父进程为INIT 进程
do {
reaper = next_thread(reaper);
if (reaper == father) {
reaper = task_child_reaper(father);
break;
}
} while (reaper->flags & PF_EXITING);
/*
* There are only two places where our children can be:
*
* - in our child list
* - in our ptraced child list
*
* Search them and reparent children.
*/
list_for_each_entry_safe(p, n, &father->children, sibling) {
int ptrace;
ptrace = p->ptrace;
/* if father isn't the real parent, then ptrace must be enabled */
//如果子进程的生父进程不是该进程,则其进程下面的子进程
//是被进程所跟踪的
BUG_ON(father != p->real_parent && !ptrace);
//如果其下子进程的父进程为该进程,更新其子进程的父进程
//为上面委派的进程
if (father == p->real_parent) {
/* reparent with a reaper, real father it's us */
p->real_parent = reaper;
reparent_thread(p, father, 0);
} else {
/* reparent ptraced task to its real parent */
//如果子进程是其跟踪进程,则解除关系,将跟踪进程加入其
//生父进程
__ptrace_unlink (p);?
//如果子进程为EXIT_ZOMEBIE状态.又父进程关系发生了改变
//则应该向新的父进程发送信号
if (p->exit_state == EXIT_ZOMBIE && p->exit_signal != -1 &&
thread_group_empty(p))
do_notify_parent(p, p->exit_signal);
}
/*
* if the ptraced child is a zombie with exit_signal == -1
* we must collect it before we exit, or it will remain
* zombie forever since we prevented it from self-reap itself
* while it was being traced by us, to be able to see it in wait4.
*/
// 如果子进程为僵尸状态,且退出时不给父进程信号
//就将其收集起来,到时候统一退出处理
if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && p->exit_signal == -1))
list_add(&p->ptrace_list, &ptrace_dead);
}
//father->ptrace_children: 被别的进程跟踪的子进程
//重新设置子进程的生父进程
list_for_each_entry_safe(p, n, &father->ptrace_children, ptrace_list) {
p->real_parent = reaper;
reparent_thread(p, father, 1);
}
write_unlock_irq(&tasklist_lock);
BUG_ON(!list_empty(&father->children));
BUG_ON(!list_empty(&father->ptrace_children));
//释放ptrace_dead 链中的所有僵死进程
list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_list) {
list_del_init(&p->ptrace_list);
release_task(p);
}
}
这个函数比较简单。转入reparent_thread():
static void
reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
{
//如果定义了父进程退出时发出的信号
if (p->pdeath_signal)
/* We already hold the tasklist_lock here. */
group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
/* Move the child from its dying parent to the new one. */
if (unlikely(traced)) {
/* Preserve ptrace links if someone else is tracing this child. */
list_del_init(&p->ptrace_list);
if (p->parent != p->real_parent)
list_add(&p->ptrace_list, &p->real_parent->ptrace_children);
} else {
/* If this child is being traced, then we're the one tracing it
* anyway, so let go of it.
*/
//更新进程的父进程
//设置进程的父进程为生父进程(在forget_original_parent中为其重新设置了生父进程)
p->ptrace = 0;
remove_parent(p);
p->parent = p->real_parent;
add_parent(p);
if (p->state == TASK_TRACED) {
/*
* If it was at a trace stop, turn it into
* a normal stop since it's no longer being
* traced.
*/
ptrace_untrace(p);
}
}
/* If this is a threaded reparent there is no need to
* notify anyone anything has happened.
*/
//如果进程新的父进程与以前父进程在同一个组内
//则说明这个组没有退出
//反之:如果不相同的话,则说明组已经退出了,这个组内的进程要
//全部都退出
if (p->real_parent->group_leader == father->group_leader)
return;
/* We don't want people slaying init. */
//如果允行进程退出时给父进程信号.则设定该信号为SIGCHLD
if (p->exit_signal != -1)
p->exit_signal = SIGCHLD;
/* If we'd notified the old parent about this child's death,
* also notify the new parent.
*/
//如果进程状态为EXIG_ZOMBIE,且线程组为空,则向父进程发送相关信号
if (!traced && p->exit_state == EXIT_ZOMBIE &&
p->exit_signal != -1 && thread_group_empty(p))
do_notify_parent(p, p->exit_signal);
/*
* process group orphan check
* Case ii: Our child is in a different pgrp
* than we are, and it was the only connection
* outside, so the child pgrp is now orphaned.
*/
//子进程和父进程是属于同一个会话的不同进程组
if ((task_pgrp(p) != task_pgrp(father)) &&
(task_session(p) == task_session(father))) {
struct pid *pgrp = task_pgrp(p);
//按照P O S I X . 1 标准, 如果孤儿进程组有终止的任务
//则给组中所有成员发送SIGHUP 和SIGCONGT
if (will_become_orphaned_pgrp(pgrp, NULL) &&
has_stopped_jobs(pgrp)) {
__kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);
__kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);
}
}
}
在这里,有必要了解一下什么叫孤儿进程。下面的内容摘自《Unix高级环境编程》:
1:
每个进程组有一个组长进程。组长进程的标识是,其进程组I D等于其进程I D。
进程组组长可以创建一个进程组,创建该组中的进程,然后终止。只要在某个进程组中有
一个进程存在,则该进程组就存在,这与其组长进程是否终止无关。从进程组创建开始到其中
最后一个进程离开为止的时间区间称为进程组的生命期。某个进程组中的最后一个进程可以终
止,也可以参加另一个进程组。
进程调用s e t p g i d可以参加一个现存的组或者创建一个新进程组(下一节中将说明用s e t s i d也
可以创建一个新的进程组)。
2:
对话期(s e s s i o n)是一个或多个进程组的集合。进程调用s e t s i d函数就可建立一个新对话期。进程调用s e t s i d函数就可建立一个新对话期。
#include
#include
pid_t setsid(void);
返回:若成功则为进程组I D,若出错则为-1
如果调用此函数的进程不是一个进程组的组长,则此函数创建一个新对话期,结果为:
(1) 此进程变成该新对话期的对话期首进程( session leader,对话期首进程是创建该对话期
的进程)。此进程是该新对话期中的唯一进程。
(2) 此进程成为一个新进程组的组长进程。新进程组I D是此调用进程的进程I D。
(3) 此进程没有控制终端(下一节讨论控制终端)。如果在调用s e t s i d之前此进程有一个控
制终端,那么这种联系也被解除。
如果此调用进程已经是一个进程组的组长,则此函数返回出错。为了保证不处于这种情况,
通常先调用fork,然后使其父进程终止,而子进程则继续。因为子进程继承了父进程的进程组ID,
而其进程ID则是新分配的,两者不可能相等,所以这就保证了子进程不是一个进程组的组长。
3:
P O S I X . 1将孤儿进程组( orphaned processg r o u p)定义为:该组中每个成员的父进程或者是该组的一个成员,或者不是该组所属对话期的成员。对孤儿进程组的另一种描述可以是:一个进程组不是孤儿进程组的条件是:该组中有一个进程,其父进程在属于同一对话期的另一个组中。如果进程组不是孤儿进程组,那么在属于同一对话期的另一个组中的父进程就有机会重新起动该组中停止的进程。
在这段代码中,我们首次见到了release_task().这个函数在进程子系统中经常见到,分析如下:
void release_task(struct task_struct * p)
{
struct task_struct *leader;
int zap_leader;
repeat:
//减少用户的进程计数
atomic_dec(&p->user->processes);
proc_flush_task(p);
write_lock_irq(&tasklist_lock);
//断开跟踪关系,设置进程父进程为生父进程
ptrace_unlink(p);
BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
__exit_signal(p);
/*
* If we are the last non-leader member of the thread
* group, and the leader is zombie, then notify the
* group leader's parent process. (if it wants notification.)
*/
zap_leader = 0;
leader = p->group_leader;
if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {
//进程为僵尸进程的话,它退出时肯定会向父进程发送信号的
BUG_ON(leader->exit_signal == -1);
//向父进程发送相关信号
do_notify_parent(leader, leader->exit_signal);
/*
* If we were the last child thread and the leader has
* exited already, and the leader's parent ignores SIGCHLD,
* then we are the one who should release the leader.
*
* do_notify_parent() will have marked it self-reaping in
* that case.
*/
zap_leader = (leader->exit_signal == -1);
}
write_unlock_irq(&tasklist_lock);
release_thread(p);
//释放进程task 与内核堆栈
call_rcu(&p->rcu, delayed_put_task_struct);
p = leader;
if (unlikely(zap_leader))
goto repeat;
}
delayed_put_task_struct的定义如下:
static void delayed_put_task_struct(struct rcu_head *rhp)
{
put_task_struct(container_of(rhp, struct task_struct, rcu));
}
继续跟踪put_task_struct():
static inline void put_task_struct(struct task_struct *t)
{
//判断引用计数
if (atomic_dec_and_test(&t->usage))
//引用计数为零,可以释放了
__put_task_struct(t);
}
_put_task_struct() àfree_task():
void free_task(struct task_struct *tsk)
{
prop_local_destroy_single(&tsk->dirties);
//释放thread_info
free_thread_info(tsk->stack);
rt_mutex_debug_task_free(tsk);
//释放task_struct
free_task_struct(tsk);
}
如果进程状态不是为僵尸状态的话。在这里就会被全部释放掉。
另外,我们在exit_notify()中可以看到对于没有定义进程结束发送信号标志的进程的处理
static void exit_notify(struct task_struct *tsk)
{
。。。。。。
if (state == EXIT_DEAD)
release_task(tsk);
}
这样,它不需要等到父进程调用wait()就直接退出了.