分类: LINUX
2009-08-30 16:16:21
-----------------------------------------------------------
本文系本站原创,欢迎转载!
转载请注明出处:http://sjj0412.cublog.cn/
-----------------------------------------------------------
Gdb,只要在linux下编过程序的应该都用过,估计大家也多对这个软件有着好奇的念头。其实gdb本身也不是那么强大,它大部分都是使用linux操作系统所带有的东西实现,其实稍加时间,自己编写一个简易的调试器也不是太难的事。今天就讲下内核中有关调试实现的机制,然后谈下gdb如何使用这些已有的东西达到调试效果以及推广到硬件调试器。
首先让我们想一下,什么是调试,如何达到调试效果?调试有两个目的:
1.让程序在运行到我们想要停的地方停下来:
2.停下来后读取系统的各种信息。
停下来有两种:
一是断点,一是观察点。
如果是硬件调试,这两种非常相似,只不过一种是检测内存读写,一种是检测取指令,从流水线角度看就是:断点在取指阶段,观察点在执行阶段。
如果是软件调试,断点是替换为trap指令,而观察点则不一样的意思了,只是看这个观察点的值,而不是当观察点的值发生变化时停止。
要让程序运行到指定地址时停止执行(对于硬件是指cpu不执行,对于软件是调试程序进程不执行,cpu还是在执行),有两种方式:
一种是利用cpu带有的调试功能,比如x86的eflags寄存器的TF标志位,arm的观察点寄存器组等等。
二是使用软件陷入中断。
X86的调试状态是产生中断,这样在中断处理程序就可以控制调试,而arm的调试状态是将时钟更改为由jtag的dclk提供,而这个jtag的dclk是由我们执行jtag相关操作产生,从而我们能够控制调试。上面的两种方式都是需要实现一个功能,那就是当进入调试状态时能够让我们知道,并且由我们控制,arm进入debug状态时,调试器要轮询debug status register来判断是否进入debug模式了。
事实上对于断点,上面的两种架构都没有采用硬件实现方式,为什么?对于x86,其实只有单步调试机制,没有断点设置机制,这是就得在需要设断点的地方设置int3中断控制程序,对于arm,虽然有断点(观察点)硬件机制,但是进入调试状态后必须由jtag(DCLK)控制,程序无法控制,所以仍然不能使用硬件,使用的是F001 sys_call系统调用。
具体实现是在断点处加入int 3,int 80等trap指令,然后在这些中断的处理程序中给被调试程序发trap信号,这样在中断返回时处理trap信号时,被调试进程会suspend(只要是ptrace状态,所有信号(除了kill信号外)),并通知调试程序(gdb)。
使用TF标志位实现,这个
Tflag寄存器
TF(Trap Flag)——位8,跟踪标志。置1 则开启单步执行调试模式,置0 则关闭。在单步执行模式下,处理器在每条指令后产生一个调试异常,这样在每条指令执行后都可以查看执行程序的状态。如果程序用POPF、POPFD 或者ET 指令设置TF 标志,那么这之后的第一条指令就会产生调试异常。
这样我们只需在调试异常中断时suspend被调试程序,通知调试程序即可实现单步。
由于arm硬件没有单步的实现机制,故还是得像断点一样,将下一次要执行的指令地址设置为断点,这个断点和一般断点的区别是它是临时的,执行完下一指令后不用再恢复为断点,既然要知道下一条指令的地址,就需要有预测机制,即从当前指令判断出下次会执行哪一条指令,由于预测一是涉及到b xx,ld pc等指令还有是条件项,由于判断条件得不偿失,我们一般设置两个断点,即在条件两个分支都设置断点,这样不管条件是什么,都会进入debug。
set_system_intr_gate(3, &int3); /* int3-5 can be called from all */
ENTRY(int3)
pushl $-1 # mark this as an int
SAVE_ALL
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
call do_int3
jmp ret_from_exception
也就是说int 3会进入do_int3
DO_VM86_ERROR( 3, SIGTRAP, "int3", int3)
//这个宏就会生成do_int3函数。
#define DO_VM86_ERROR(trapnr, signr, str, name) \
fastcall void do_##name(struct pt_regs * regs, long error_code) \
{ \
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
== NOTIFY_STOP) \
return; \
do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
}
do_trap会给被调试进程发SIGTRAP信号:
static void do_trap(int trapnr, int signr, char *str, int vm86,
struct pt_regs * regs, long error_code, siginfo_t *info)
{
struct task_struct *tsk = current;
tsk->thread.error_code = error_code;
tsk->thread.trap_no = trapnr;
if (regs->eflags & VM_MASK) {
if (vm86)
goto vm86_trap;
goto trap_signal;
}
if (!user_mode(regs))
goto kernel_trap;
trap_signal: {
if (info)
force_sig_info(signr, info, tsk);
else
force_sig(signr, tsk);
return;
//给被调试进程发sigtrap信号
}
kernel_trap: {
if (!fixup_exception(regs))
die(str, regs, error_code);
return;
}
vm86_trap: {
int ret = handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr);
if (ret) goto trap_signal;
return;
}
}
然后调试中断返回时执行下面这个信号处理函数(每次从内核态到用户态会执行):
int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset)
{
siginfo_t info;
int signr;
struct k_sigaction ka;
/*
* We want the common case to go fast, which
* is why we may in certain cases get here from
* kernel mode. Just return without doing anything
* if so.
*/
if (!user_mode(regs))
return 1;
if (try_to_freeze())
goto no_signal;
if (!oldset)
oldset = ¤t->blocked;
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
if (signr > 0) {
/* Reenable any watchpoints before delivering the
* signal to user space. The processor register will
* have been cleared if the watchpoint triggered
* inside the kernel.
*/
if (unlikely(current->thread.debugreg[7])) {
set_debugreg(current->thread.debugreg[7], 7);
}
/* Whee! Actually deliver the signal. */
return handle_signal(signr, &info, &ka, oldset, regs);
}
no_signal:
/* Did we come from a system call? */
if (regs->orig_eax >= 0) {
/* Restart the system call - no handlers present */
if (regs->eax == -ERESTARTNOHAND ||
regs->eax == -ERESTARTSYS ||
regs->eax == -ERESTARTNOINTR) {
regs->eax = regs->orig_eax;
regs->eip -= 2;
}
if (regs->eax == -ERESTART_RESTARTBLOCK){
regs->eax = __NR_restart_syscall;
regs->eip -= 2;
}
}
return 0;
}
int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
struct pt_regs *regs, void *cookie)
{
sigset_t *mask = ¤t->blocked;
int signr = 0;
relock:
spin_lock_irq(¤t->sighand->siglock);
for (;;) {
struct k_sigaction *ka;
if (unlikely(current->signal->group_stop_count > 0) &&
handle_group_stop())
goto relock;
signr = dequeue_signal(current, mask, info);
if (!signr)
break; /* will return 0 */
if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) {
ptrace_signal_deliver(regs, cookie);
/* Let the debugger run. */
ptrace_stop(signr, signr, info);
//这个停止被调试进程,通知调试进程(gdb)
/* We're back. Did the debugger cancel the sig? */
signr = current->exit_code;
if (signr == 0)
continue;
current->exit_code = 0;
/* Update the siginfo structure if the signal has
changed. If the debugger wanted something
specific in the siginfo structure then it should
have updated *info via PTRACE_SETSIGINFO. */
if (signr != info->si_signo) {
info->si_signo = signr;
info->si_errno = 0;
info->si_code = SI_USER;
info->si_pid = current->parent->pid;
info->si_uid = current->parent->uid;
}
/* If the (new) signal is now blocked, requeue it. */
if (sigismember(¤t->blocked, signr)) {
specific_send_sig_info(signr, info, current);
continue;
}
}
ka = ¤t->sighand->action[signr-1];
if (ka->sa.sa_handler == SIG_IGN) /* Do nothing. */
continue;
if (ka->sa.sa_handler != SIG_DFL) {
/* Run the handler. */
*return_ka = *ka;
if (ka->sa.sa_flags & SA_ONESHOT)
ka->sa.sa_handler = SIG_DFL;
break; /* will return non-zero "signr" value */
}
......................
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
}
ENTRY(vector_swi)
save_user_regs
zero_fp
get_scno
arm710_bug_check scno, ip
#ifdef CONFIG_ALIGNMENT_TRAP
ldr ip, __cr_alignment
ldr ip, [ip]
mcr p15, 0, ip, c1, c0 @ update control register
#endif
enable_irqs ip
str r4, [sp, #-S_OFF]! @ push fifth arg
get_current_task tsk
ldr ip, [tsk, #TSK_PTRACE] @ check for syscall tracing
bic scno, scno, #0xff000000 @ mask off SWI op-code
eor scno, scno, #OS_NUMBER << 20 @ check OS number
adr tbl, sys_call_table @ load syscall table pointer
tst ip, #PT_TRACESYS @ are we tracing syscalls?
bne __sys_trace
adrsvc al, lr, ret_fast_syscall @ return address
cmp scno, #NR_syscalls @ check upper syscall limit
//系统调用号是否大于NR_syscalls =256,如果是则调用
ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine
add r1, sp, #S_OFF
2: mov why, #0 @ no longer a real syscall
cmp scno, #ARMSWI_OFFSET
eor r0, scno, #OS_NUMBER << 20 @ put OS number back
bcs SYMBOL_NAME(arm_syscall)
b SYMBOL_NAME(sys_ni_syscall) @ not private func
/*
* This is the really slow path. We're going to be doing
* context switches, and waiting for our parent to respond.
*/
由于断点的trap号为
#define __ARM_NR_BASE (__NR_SYSCALL_BASE+0x0f0000)
#define __ARM_NR_breakpoint (__ARM_NR_BASE+1)
明显大于_NR_SYSCALL_BASE,故会执行下面的bcs SYMBOL_NAME(arm_syscall)
asmlinkage int arm_syscall(int no, struct pt_regs *regs)
{
siginfo_t info;
if ((no >> 16) != 0x9f)
return bad_syscall(no, regs);
switch (no & 0xffff) {
case 0: /* branch through 0 */
info.si_signo = SIGSEGV;
info.si_errno = 0;
info.si_code = SEGV_MAPERR;
info.si_addr = NULL;
force_sig_info(SIGSEGV, &info, current);
die_if_kernel("branch through zero", regs, 0);
return 0;
case NR(breakpoint): /* SWI BREAK_POINT */
/*
* The PC is always left pointing at the next
* instruction. Fix this.
*/
regs->ARM_pc -= 4;
__ptrace_cancel_bpt(current);
info.si_signo = SIGTRAP;
info.si_errno = 0;
info.si_code = TRAP_BRKPT;
info.si_addr = (void *)instruction_pointer(regs) -
(thumb_mode(regs) ? 2 : 4);
force_sig_info(SIGTRAP, &info, current);
return regs->ARM_r0;
...........................
}
也是给被调试进程发SIGTRAP信号:
然后调试陷入中断返回时会调用如下函数:
static int do_signal(sigset_t *oldset, struct pt_regs *regs, int syscall)
{
struct k_sigaction ka;
siginfo_t info;
int signr;
/*
* We want the common case to go fast, which
* is why we may in certain cases get here from
* kernel mode. Just return without doing anything
* if so.
*/
if (!user_mode(regs))
return 0;
if (try_to_freeze())
goto no_signal;
if (current->ptrace & PT_SINGLESTEP)
ptrace_cancel_bpt(current);
//取消这条指令的断点,并恢复,比如mov r1,r0,当我们执行singlestep,其实是把这个指令保存在thread.bt[]里,然后将 SWI BREAK_POINT放到里面,所以这是要恢复以边运行这条指令
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
//这条指令会是被调试程序suspend,并通知调试进程gdb.
if (signr > 0) {
handle_signal(signr, &ka, &info, oldset, regs, syscall);
if (current->ptrace & PT_SINGLESTEP)
ptrace_set_bpt(current);
//根据分支预测设置下一个临时断点。
return 1;
}
int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
struct pt_regs *regs, void *cookie)
{
sigset_t *mask = ¤t->blocked;
int signr = 0;
relock:
spin_lock_irq(¤t->sighand->siglock);
for (;;) {
struct k_sigaction *ka;
if (unlikely(current->signal->group_stop_count > 0) &&
handle_group_stop())
goto relock;
signr = dequeue_signal(current, mask, info);
if (!signr)
break; /* will return 0 */
if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) {
ptrace_signal_deliver(regs, cookie);
/* Let the debugger run. */
ptrace_stop(signr, signr, info);
/* We're back. Did the debugger cancel the sig? */
signr = current->exit_code;
if (signr == 0)
continue;
current->exit_code = 0;
/* Update the siginfo structure if the signal has
changed. If the debugger wanted something
specific in the siginfo structure then it should
have updated *info via PTRACE_SETSIGINFO. */
if (signr != info->si_signo) {
info->si_signo = signr;
info->si_errno = 0;
info->si_code = SI_USER;
info->si_pid = current->parent->pid;
info->si_uid = current->parent->uid;
}
/* If the (new) signal is now blocked, requeue it. */
if (sigismember(¤t->blocked, signr)) {
specific_send_sig_info(signr, info, current);
continue;
}
}
ka = ¤t->sighand->action[signr-1];
if (ka->sa.sa_handler == SIG_IGN) /* Do nothing. */
continue;
if (ka->sa.sa_handler != SIG_DFL) {
/* Run the handler. */
*return_ka = *ka;
if (ka->sa.sa_flags & SA_ONESHOT)
ka->sa.sa_handler = SIG_DFL;
break; /* will return non-zero "signr" value */
}
...................
}
static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info)
{
/*
* If there is a group stop in progress,
* we must participate in the bookkeeping.
*/
if (current->signal->group_stop_count > 0)
--current->signal->group_stop_count;
current->last_siginfo = info;
current->exit_code = exit_code;
/* Let the debugger run. */
set_current_state(TASK_TRACED);
//这样被调试进程就不在running状态
spin_unlock_irq(¤t->sighand->siglock);
read_lock(&tasklist_lock);
if (likely(current->ptrace & PT_PTRACED) &&
likely(current->parent != current->real_parent ||
!(current->ptrace & PT_ATTACHED)) &&
(likely(current->parent->signal != current->signal) ||
!unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))) {
do_notify_parent_cldstop(current, current->parent,
CLD_TRAPPED);
read_unlock(&tasklist_lock);
schedule();//被调试进程肯定阻塞。
} else {
/*
* By the time we got the lock, our tracer went away.
* Don't stop here.
*/
read_unlock(&tasklist_lock);
set_current_state(TASK_RUNNING);
current->exit_code = nostop_code;
}
...............................
}