前一阵子搞动态替换内核函数,过程十分痛苦,特写此文,纪念那些该死的Oops
一、源代码文件
forward.c 驱动模块的文件
-
#define CODESIZE 4
-
-
int (* orig_rtc_dev_open) (struct inode *inode, struct file *file) =
-
( int(*)(struct inode *inode, struct file *file))0xc01f5f00;
-
-
void* my_memcpy (void *dest, const void *src, int size)
-
{
-
const char *p = src;
-
char *q = dest;
-
int i;
-
for (i=0; i<size; i++) *q++ = *p++;
-
return dest;
-
}
-
-
int my_rtc_dev_open(struct inode *inode, struct file *file)
-
{
-
printk("leonlalal\n");
-
return 0;
-
}
-
-
int forward_init(void)
-
{
-
uint32_t addr = 0;
-
int i;
-
char rtc_dev_open_buf[CODESIZE];
-
-
addr = (uint32_t)my_rtc_dev_open;
-
-
addr = (addr<<2) & ~0xfe000003 ;
-
-
addr |= 0x48000000;
-
printk("fuc %08x\n", addr);
-
-
lock_kernel();
-
-
for(i = 0; i< CODESIZE; i++){
-
printk("%08x \n", *((uint32_t *)orig_rtc_dev_open + i));
-
}
-
-
//kernel code ---> buf
-
my_memcpy(rtc_dev_open_buf, (char *)orig_rtc_dev_open, CODESIZE);
-
-
-
printk("rtc_dev_open_buf[%08x]\n", *((uint32_t *)rtc_dev_open_buf));
-
-
//new jump code ---> kernel code
-
my_memcpy(orig_rtc_dev_open, (char*)addr, CODESIZE);
-
-
unlock_kernel();
-
return 0;
-
}
-
-
void forward_exit (void)
-
{
-
//buf ---> kernel code
-
my_memcpy(orig_rtc_dev_open, rtc_dev_open_buf, CODESIZE);
-
}
二、看Oops
加载后通过ioctl执行forward_init函数,出现oops
-
-bash-3.2# dmesg -c
-
dbg exit now
-
the major device No. is 215
-
fuc 4818a900
-
9421ffe0
-
7c0802a6
-
90010024
-
bfa10014
-
rtc_dev_open_buf[9421ffe0]
-
Oops: kernel access of bad area, sig: 11 [#1]
-
PREEMPT
-
NIP: f1062a28 LR: f1062b0c CTR: 00000004
-
REGS: efec5de0 TRAP: 0300 Not tainted (2.6.24.2)
-
MSR: 00029000 CR: 44000422 XER: 00000000
-
DEAR: 4818a900, ESR: 00000000
-
TASK = efea7aa0[1243] 'my_dump' THREAD: efec4000
-
GPR00: f1062afc efec5e90 efea7aa0 c01f5f00 4818a900 00000004 00000000 c033d130 第三步
-
GPR08: 00000000 c01f5f00 00000000 c0340000 24000482 100191e8 00000240 00000000
-
GPR16: 1010f6b0 1009ea94 100f0000 100f7ac8 00000000 1010f540 100ff0b8 00000000
-
GPR24: 3000e86c 00000002 7fefbc24 0000002e 4818a900 f1060000 f1060000 f1060000
-
NIP [f1062a28] my_memcpy+0x10/0x28 [enetdbg] 第二步
-
LR [f1062b0c] forward_init+0xa0/0xbc [enetdbg]
-
Call Trace:
-
[efec5e90] [f1062afc] forward_init+0x90/0xbc [enetdbg] (unreliable) 第一步
-
[efec5eb0] [f106011c] dbg_ioctl+0x11c/0x148 [enetdbg]
-
[efec5ec0] [c0086d04] do_ioctl+0x84/0xc0
-
[efec5ee0] [c0086df8] vfs_ioctl+0xb8/0x448
-
[efec5f10] [c00871c8] sys_ioctl+0x40/0x74
-
[efec5f40] [c0002340] ret_from_syscall+0x0/0x3c
-
Instruction dump:
-
409dffac 387a45c0 48000161 80010034 bb210014 38210030 7c0803a6 4e800020
-
2c050000 7c691b78 4c810020 7ca903a6 <88040000> 38840001 98090000 39290001 第四步
-
-bash-3.2#
1. 先看
Call Trace, 知道是死在了
forward_init()里
2. 再看
NIP(Next Instruction Pointer):
出错时,下一个执行令的地址为
[f1062a28] my_memcpy+0x10/0x28 [enetdbg]
[f1062a28] 出错指令的绝对地址
my_memcpy+0x10/0x28 出错指令在
my_memcpy函数中的偏移量为
0x10, 函数总长度为
0x28
[enetdbg] 出错指令的所属模块
3. 看看GPR(General Purpose Register):
通用寄存器,其中 r3-r10是用来放形参的
r3 c01f5f00 对应第一个形参 orig_rtc_dev_open,存放指令的地址
r4 4818a900 对应第二个形参 addr ,即新指令的值,通过打印信息确定 //错误就在这
r5 00000004 对应第三个形参 CODESIZE
现在基本确认是死在了 forward_init() ---> my_memcpy() 里
4. 死在了哪条语句上呢,就得看 Instruction dump
我们看到执行的最后几条指令是
<88040000> 38840001 98090000 39290001
怎么找这几条指令的对应的C代码呢,就需要反汇编了
三、反汇编找C代码
前面forwar.c 编译后生成了 forward.o
对其进行反汇编 ppc/85xx/bin/ppc_85xx-objdump -S forwar.o > forward_debug.txt
看看forward_debug.txt的内容
-
forward.o: 文件格式 elf32-powerpc
-
-
反汇编 .text 节:
-
-
00000000 <my_memcpy>:
-
{
-
const char *p = src;
-
char *q = dest;
-
int i;
-
for (i=0; i<size; i++) *q++ = *p++;
-
0: 2c 05 00 00 cmpwi r5,0
-
4: 7c 69 1b 78 mr r9,r3
-
8: 4c 81 00 20 blelr
-
c: 7c a9 03 a6 mtctr r5
-
10: 88 04 00 00 lbz r0,0(r4)
-
14: 38 84 00 01 addi r4,r4,1
-
18: 98 09 00 00 stb r0,0(r9)
-
1c: 39 29 00 01 addi r9,r9,1
-
20: 42 00 ff f0 bdnz+ 10 <my_memcpy+0x10>
-
return dest;
-
}
-
24: 4e 80 00 20 blr
-
-
00000028 <my_rtc_dev_open>:
-
-
int my_rtc_dev_open(struct inode *inode, struct file *file)
-
{
-
28: 94 21 ff f0 stwu r1,-16(r1)
-
printk("leonlalal\n");
-
2c: 3c 60 00 00 lis r3,0
-
30: 7c 08 02 a6 mflr r0
-
34: 38 63 00 00 addi r3,r3,0
-
38: 90 01 00 14 stw r0,20(r1)
-
3c: 48 00 00 01 bl 3c <my_rtc_dev_open+0x14>
-
return 0;
-
}
-
40: 80 01 00 14 lwz r0,20(r1)
-
44: 38 60 00 00 li r3,0
-
48: 38 21 00 10 addi r1,r1,16
-
4c: 7c 08 03 a6 mtlr r0
-
50: 4e 80 00 20 blr
-
-
00000054 <forward_init>:
-
-
int forward_init(void)
-
{
-
54: 94 21 ff e0 stwu r1,-32(r1)
-
uint32_t addr = 0;
-
int i;
-
-
addr = (uint32_t)my_rtc_dev_open;
-
58: 3d 20 00 00 lis r9,0
-
-
addr = (addr<<2) & ~0xfe000003 ;
-
-
addr |= 0x48000000;
-
printk("fuc %08x\n", addr);
-
5c: 3c 60 00 00 lis r3,0
-
60: 7c 08 02 a6 mflr r0
-
64: 38 63 00 0c addi r3,r3,12
-
68: bf 81 00 10 stmw r28,16(r1)
-
6c: 3b 89 00 00 addi r28,r9,0
-
70: 57 9c 11 fa rlwinm r28,r28,2,7,29
-
74: 90 01 00 24 stw r0,36(r1)
-
78: 67 9c 48 00 oris r28,r28,18432
-
-
lock_kernel();
-
-
for(i = 0; i< CODESIZE; i++){
-
7c: 3b a0 00 00 li r29,0
-
80: 7f 84 e3 78 mr r4,r28
-
84: 4c c6 31 82 crclr 4*cr1+eq
-
88: 48 00 00 01 bl 88 <forward_init+0x34>
-
8c: 3f c0 00 00 lis r30,0
-
90: 48 00 00 01 bl 90 <forward_init+0x3c>
-
94: 3f e0 00 00 lis r31,0
-
printk("%08x \n", *((uint32_t *)orig_rtc_dev_open + i));
-
98: 81 7f 00 00 lwz r11,0(r31)
-
9c: 57 a9 10 3a rlwinm r9,r29,2,0,29
-
a0: 38 7e 00 18 addi r3,r30,24
-
a4: 3b bd 00 01 addi r29,r29,1
-
a8: 7c 89 58 2e lwzx r4,r9,r11
-
ac: 4c c6 31 82 crclr 4*cr1+eq
-
b0: 48 00 00 01 bl b0 <forward_init+0x5c>
-
b4: 2f 9d 00 03 cmpwi cr7,r29,3
-
b8: 40 9d ff e0 ble+ cr7,98 <forward_init+0x44>
-
}
-
-
//kernel code ---> buf
-
my_memcpy(rtc_dev_open_buf, (char *)orig_rtc_dev_open, CODESIZE);
-
bc: 80 9f 00 00 lwz r4,0(r31)
-
c0: 3f a0 00 00 lis r29,0
-
c4: 38 a0 00 04 li r5,4
-
c8: 38 7d 00 00 addi r3,r29,0
-
cc: 48 00 00 01 bl cc <forward_init+0x78>
-
-
-
printk("rtc_dev_open_buf[%08x]\n", *((uint32_t *)rtc_dev_open_buf));
-
d0: 80 9d 00 00 lwz r4,0(r29)
-
d4: 3c 60 00 00 lis r3,0
-
d8: 38 63 00 20 addi r3,r3,32
-
dc: 4c c6 31 82 crclr 4*cr1+eq
-
e0: 48 00 00 01 bl e0 <forward_init+0x8c>
-
-
//new jump code ---> kernel code
-
my_memcpy(orig_rtc_dev_open, (char*)addr, CODESIZE);
-
e4: 80 7f 00 00 lwz r3,0(r31)
-
e8: 7f 84 e3 78 mr r4,r28
-
ec: 38 a0 00 04 li r5,4
-
f0: 48 00 00 01 bl f0 <forward_init+0x9c>
-
-
unlock_kernel();
-
f4: 48 00 00 01 bl f4 <forward_init+0xa0>
-
return 0;
-
}
-
f8: 80 01 00 24 lwz r0,36(r1)
-
fc: 38 60 00 00 li r3,0
-
100: bb 81 00 10 lmw r28,16(r1)
-
104: 7c 08 03 a6 mtlr r0
-
108: 38 21 00 20 addi r1,r1,32
-
10c: 4e 80 00 20 blr
-
-
00000110 <forward_exit>:
-
-
void forward_exit (void)
-
{
-
//buf ---> kernel code
-
my_memcpy(orig_rtc_dev_open, rtc_dev_open_buf, CODESIZE);
-
110: 3d 20 00 00 lis r9,0
-
114: 3c 80 00 00 lis r4,0
-
118: 80 69 00 00 lwz r3,0(r9)
-
11c: 38 84 00 00 addi r4,r4,0
-
120: 38 a0 00 04 li r5,4
-
124: 48 00 00 00 b 124 <forward_exit+0x14>
来搜
<88040000> 38840001 98090000 39290001 来这几条指令
原来是死在了my_memcpy里的赋值语句上。
赋值操作应该没有错,那就是my_memcpy 形参错了,最后定位到
-
//new jump code ---> kernel code
-
my_memcpy(orig_rtc_dev_open, (char*)addr, CODESIZE);
我擦,addr丢了一个&,改之
my_memcpy
(orig_rtc_dev_open
, (char
*)&addr
, CODESIZE
);
---------------------------------------------------------------------------------------------
后话:
编译,运行,那就是另外一个Oops了,不是死在自己的模块函数里了,死在内核函数里了。
反汇编找C代码的时候更麻烦了,不过套路是一样的,反汇编内核就
ppc/85xx/bin/ppc_85xx-objdump -S vmlinux | less
有些时候,Oops的Instruction dump里全是xxxx,那你就得详细的查查看LR,CTR什么的是不是错了,多半你的代码跳到了一个非法的地址。
四、arm的oops调试(没地儿放,就这了吧)
-
[root@leonwang]# mmcconfig -a
-
Unable to handle kernel NULL pointer dereference at virtual address 00000048
-
pgd = dfbac000
-
[00000048] *pgd=1fadb831, *pte=00000000, *ppte=00000000
-
Internal error: Oops: 817 [#1] PREEMPT SMP
-
Modules linked in: mmc_drv(O)
-
CPU: 0 Tainted: G O (3.3.0-14.2-build1 #58)
-
PC is at xilinx_prepare_hs_sdr+0x30/0xb8
-
LR is at xilinx_prepare_hs_sdr+0x28/0xb8
-
pc : [<c02a74d8>] lr : [<c02a74d0>] psr: 60000013
-
sp : dfae3ec0 ip : dfae2000 fp : 00000000
-
r10: 00000000 r9 : dfae2000 r8 : 00000000
-
r7 : dfbed400 r6 : 00000000 r5 : df871280 r4 : df871000
-
r3 : 00000000 r2 : 00000000 r1 : 017d7840 r0 : 00000006
-
Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user
-
Control: 18c5387d Table: 1fbac04a DAC: 00000015
-
Process mmcconfig (pid: 555, stack limit = 0xdfae22f0)
-
Stack: (0xdfae3ec0 to 0xdfae4000)
-
3ec0: bef41d20 df871000 dfae2000 bf000b0c df030dac dfae3f78 00000001 df9cd000
-
3ee0: ffffff9c c0015d28 00000000 00000007 c04c6084 b6e9a670 00000006 b6f39000
-
3f00: df5704a8 bef41d20 dfa68a40 00000003 c0015d28 c00a9760 00005452 c00aa1e0
-
3f20: 00000000 00000000 df993240 0000001b 00000000 c008a678 dfae13c8 00000000
-
3f40: 00000000 00000000 00000002 00000003 00000003 c0099b98 00000000 00000003
-
3f60: 00000003 dfa68a40 bef41d20 c0084502 00000003 c0015d28 dfae2000 00000000
-
3f80: 00000000 c00aa25c 00000003 00000000 bef41d20 00015a3c 00015a1c 00015a28
-
3fa0: 00000036 c0015b80 00015a3c 00015a1c 00000003 c0084502 bef41d20 00015a60
-
3fc0: 00015a3c 00015a1c 00015a28 00000036 00000000 00000000 00015a34 00000000
-
3fe0: bef41e94 bef41d18 0000c150 b6e9a67c 60000010 00000003 f7fff3ff 6f7ddfff
-
[<c02a74d8>] (xilinx_prepare_hs_sdr+0x30/0xb8) from [<bf000b0c>] (emmc_test_ioctl+0x210/0x2e4 [mmc_drv])
-
[<bf000b0c>] (emmc_test_ioctl+0x210/0x2e4 [mmc_drv]) from [<c00a9760>] (vfs_ioctl+0x24/0x40)
-
[<c00a9760>] (vfs_ioctl+0x24/0x40) from [<c00aa1e0>] (do_vfs_ioctl+0x4cc/0x514)
-
[<c00aa1e0>] (do_vfs_ioctl+0x4cc/0x514) from [<c00aa25c>] (sys_ioctl+0x34/0x54)
-
[<c00aa25c>] (sys_ioctl+0x34/0x54) from [<c0015b80>] (ret_fast_syscall+0x0/0x30)
-
Code: 0a000000 e12fff33 e3a00006 e59f1074 (e5860048)
-
---[ end trace 901cac0a83592b19 ]---
-
Segmentation fault
-
[root@leonwang]#
看来是内核build-in 驱动函数xilinx_prepare_hs_sdr() 出bug了
-
#arm-linux-objdump -S sdhci_xilinx.o >> tmp_file
-
#cat tmp_file
-
..........略
-
-
710 00000a7c <xilinx_prepare_hs_sdr>:
-
711 a7c: e92d4070 push {r4, r5, r6, lr}
-
712 a80: e1a04000 mov r4, r0
-
713 a84: e2805d0a add r5, r0, #640 ; 0x280
-
714 a88: e5906290 ldr r6, [r0, #656]
-
715 a8c: f57ff04f dsb sy
-
716 a90: e59f3088 ldr r3, [pc, #136] ; b20 <xilinx_prepare_hs_sdr+0xa4>
-
717 a94: e5933018 ldr r3, [r3, #24]
-
718 a98: e3530000 cmp r3, #0 ; 0x0
-
719 a9c: 0a000000 beq aa4 <xilinx_prepare_hs_sdr+0x28>
-
720 aa0: e12fff33 blx r3
-
721 aa4: e3a00006 mov r0, #6 ; 0x6
-
722 aa8: e59f1074 ldr r1, [pc, #116] ; b24 <xilinx_prepare_hs_sdr+0xa8>
-
723 aac: e5860048 str r0, [r6, #72]
-
724 ab0: e1a00004 mov r0, r4
-
725 ab4: ebfffffe bl 2a0 <xilinx_set_clk>
-
726 ab8: e3a0400b mov r4, #11 ; 0xb
-
727 abc: ea000001 b ac8 <xilinx_prepare_hs_sdr+0x4c>
-
728 ac0: e59f0060 ldr r0, [pc, #96] ; b28 <xilinx_prepare_hs_sdr+0xac>
-
729 ac4: ebfffffe bl 0 <__const_udelay>
-
730 ac8: e2544001 subs r4, r4, #1 ; 0x1
-
731 acc: 1afffffb bne ac0 <xilinx_prepare_hs_sdr+0x44>
找到 xilinx_prepare_hs_sdr+0x30 处
xilinx_prepare_hs_sdr 是 00000a7c
xilinx_prepare_hs_sdr+0x30 是 00000aac
00000aac处指令为 e5860048, 与上面oops日志
Code: 0a000000 e12fff33 e3a00006 e59f1074 (e5860048)
相对应
下面对应的是驱动中的C代码
-
42 #define XILINX_EMMC_DMA_RD_LEN 0x40
-
43 #define XILINX_EMMC_DMA_WR_LEN 0x44
-
44 #define XILINX_EMMC_CFG 0x48
-
-
........
-
-
87 //XXX "XILINX_EMMC_CFG" field
-
88
-
89 #define CFG_DDR_ENABLE 0x00000001 // 0.SDR 1.DDR
-
90 #define CFG_DATA_CLK 0x00000002 // 0.400k 1.data transfer clock
-
91 #define CFG_DMA_ENABLE 0x00000004 // 0.disable 1.enable
-
92 #define CFG_HS400_ENABLE 0x00000008 // 0.disable 1.enable
-
-
........
-
-
418 void xilinx_prepare_hs_sdr(struct mmc_host *mmc)
-
419 {
-
420 struct xilinx_emmc_host *host = mmc_priv(mmc);
-
421 uint8_t *base_reg = host->base_reg;
-
422 uint32_t tmp = 0;
-
423 int i = 0;
-
424
-
425 // xilinx host enter hs sdr mode
-
426 if( xilinx_use_dma )
-
427 tmp |= CFG_DMA_ENABLE;
-
428
-
429 writel( tmp | CFG_DATA_CLK, base_reg + XILINX_EMMC_CFG);
-
430
-
431 xilinx_set_clk(mmc, 25000000); // 25MHz
-
432 mdelay(10);
-
433
-
434
-
435 // xilinx host only support 8-bit bus width(single date rate)
-
436 tmp = (MMC_SWITCH_MODE_WRITE_BYTE << 24) | // write
-
437 (EXT_CSD_BUS_WIDTH << 16) | // index
-
438 (2 << 8); // value
-
439
-
440 xilinx_send_pri_cmd(host,MMC_SWITCH, tmp , MMC_RSP_R1);
-
441
-
442 for(i = 0; i < XILINX_RETRY_MAX; i++){
-
443 tmp = xilinx_send_pri_cmd(host,MMC_SEND_STATUS, 0x00010000, MMC_RSP_R1 );
-
444 if(R1_CURRENT_STATE(tmp) != R1_STATE_PRG)
-
445 break;
-
446 }
-
447
-
448 if(i == XILINX_RETRY_MAX)
-
449 printk("send pri switch EXT_CSD_BUS_WIDTH error [%08x]\n", tmp);
-
450
-
451 return ;
-
452 }
汇编中的
-
721 aa4: e3a00006 mov r0, #6 ; 0x6
-
722 aa8: e59f1074 ldr r1, [pc, #116] ; b24 <xilinx_prepare_hs_sdr+0xa8>
-
723 aac: e5860048 str r0, [r6, #72]
对应C中的
-
425 // xilinx host enter hs sdr mode
-
426 if( xilinx_use_dma )
-
427 tmp |= CFG_DMA_ENABLE;
-
428
-
429 writel( tmp | CFG_DATA_CLK, base_reg + XILINX_EMMC_CFG);
因为tmp最终值为6 ( 宏 CFG_DMA_ENABLE | CFG_DATA_CLK的值为6)
宏XILINX_EMMC_CFG 的值为0x48
汇编代码中 其实执行的是 writel(6,0x48)
即base_reg为0,异常。 排查得知xilinx_prepare_hs_sdr传入的形参错误,导致host指针错误
阅读(1526) | 评论(0) | 转发(0) |