分类: 嵌入式
2016-05-27 16:49:14
内核oops如下:
Unable to handle kernel paging request for data at address 0xc00000200081a940
//错误发生地址
Faulting instruction address: 0x800000000517ae40
Oops: Kernel access of bad area, sig: 11 [#1]
//内核关于preempt,smp的配置,PREEMPT SMP表示可抢占内核
// NR_CPUS是个宏定义,可以在config中配置(CONFIG_NR_CPUS),表示系统中CPU的最大数量
PREEMPT SMP NR_CPUS=24 CoreNet Generic
//加载了的模块列表
Modules linked in: ossmod(O) tipc(O) bcmapi(O) linux_bcm_net(O) linux_bcm_diag(O) linux_bcm_core(O) linux_uk_proxy(O) linux_kernel_bde(O) robo_spi(O)
//错误发生的cpu编号为1,发生错误进程pid为3711,进程名为SCHE7_1
//内核污染原因:Tainted: G,内核版本为3.10.55-EMBSYS-CGEL
CPU: 1 PID: 3711 Comm: SCHE7_1 Tainted: G W O 3.10.55-EMBSYS-CGEL-5.01.30.P0.B4-svn52164 #1
task: c000000140ce0080 ti: c000000140cec000 task.ti: c000000140cec000
NIP: 800000000517ae40 LR: 800000000517add0 CTR: 0000000000000000
REGS: c000000140cef1d0 TRAP: 0300 Tainted: G W O (3.10.55-EMBSYS-CGEL-5.01.30.P0.B4-svn52164)
MSR:
0000000080029000
SOFTE: 1
DEAR: c00000200081a940, ESR: 0000000000000000
GPR00: 000000000000000a c000000140cef450 80000000051ad920 80000000051ad920
GPR04: c000000140cef657 0000000000000038 80000000051a7c2d 0000000000000003
GPR08: 80000000051ad920 c00000200081a938 c00000000081a958 80000000051ad920
GPR12: 800000000519c350 c00000000fff4780 00000000000000ff 00000000000000ff
GPR16: 0000000000000000 000000000000d880 0000000000404007 0000000000000000
GPR20: 00000000000000d8 0000000000000080 000000001026ada8 000000001026a638
GPR24: 00000000102554f0 00003fff8988c228 0000000000001142 0000000000000ebe
GPR28: 0000000000000038 0000000000000000 00000000ffffffff c000000140cef950
NIP [800000000517ae40] .BCMX+0xa8fc/0x13dc4 [bcmapi]
/*由此出错点在BCMX第0xa8fc字节处,函数体大小为0x13dc4*/
LR [800000000517add0] .BCMX+0xa88c/0x13dc4 [bcmai]
Call Trace:
[c000000140cef450] [800000000517ad98] .BCMXGSControl+0xa854/0x13dc4 [bcmai] (unreliable)
[c000000140cef8d0] [8000000005184c8c] .bsp_bcm_ioctl+0x984/0x9e4 [bcmai]
[c000000140cefbd0] [8000000005184d18] .bsp_bcm_unlockedIoCtl+0x2c/0x3c [bcmai]
[c000000140cefc40] [c00000000013c510] .vfs_ioctl+0x3c/0x74
[c000000140cefcb0] [c00000000013d180] .do_vfs_ioctl+0x6f4/0x74c
[c000000140cefd80] [c00000000013d234] .SyS_ioctl+0x5c/0xa4
[c000000140cefe30] [c000000000000598] syscall_exit+0x0/0x8c
Instruction dump:
48021541 e8410028 3d620000 e92b8010 81490000 7d4ae214 554a04fe 91490000
3c620000 e9438120 7bc92ea4 7d2a4a14
---[ end trace cf847998a4cea8d7 ]---
知道了出错的点,需要借助反汇编及addr2line定位到具体的出错代码,步骤如下:
1. 反汇编
/opt/ppc_gcc4.8.2_glibc2.18.0_multi/bin/ppc64_e5500-hardfloat-linux-gnu-objdump -S bcma.ko >bcma_objdump.txt
2. 由上面的oops可知,出错点在BCMX+0xa8fc
由反汇编可知BCMX函数地址是0x0000000000002544(此处地址是十六进制)
0x0000000000002544+0xa8fc = 0x000000000000CE40
0000000000002544 <.BCMX>: //函数起始地址 2544: 7c 08 02 a6 mflr r0 2548: f8 01 00 10 std r0,16(r1) 254c: 7d 80 00 26 mfcr r12 2550: fa a1 ff a8 std r21,-88(r1) 2554: fa c1 ff b0 std r22,-80(r1) , . . ce10: 48 00 00 01 bl ce10 <.BCMXGSControl+0xa8cc> ce14: 60 00 00 00 nop ce18: 3d 62 00 00 addis r11,r2,0 ce1c: e9 2b 00 00 ld r9,0(r11) ce20: 81 49 00 00 lwz r10,0(r9) ce24: 7d 4a e2 14 add r10,r10,r28 ce28: 55 4a 04 fe clrlwi r10,r10,19 ce2c: 91 49 00 00 stw r10,0(r9) ce30: 3c 62 00 00 addis r3,r2,0 ce34: e9 43 00 00 ld r10,0(r3) ce38: 7b c9 2e a4 rldicr r9,r30,5,58 ce3c: 7d 2a 4a 14 add r9,r10,r9 ce40: e9 29 00 08 ld r9,8(r9) //出错点 ce44: 2f a9 00 00 cmpdi cr7,r9,0 ce48: 40 fe 01 1c bne+ cr7,cf64 <.BCMXGSControl+0xaa20> ce4c: 38 61 03 18 addi r3,r1,792 ce50: 48 00 00 01 bl ce50 <.BCMXGSControl+0xa90c> ce54: 60 00 00 00 nop |
3. 由以上点已经可以知道出错的地址是0x000000000000CE40,对应的汇编代码是
ce40: e9 29 00 08 ld r9,8(r9) |
但是汇编看起来还是比较晦涩的,可以借助addr2line转换成行号,方法是,
addr2line -e 进程名 地址 -f |
如下
[root@A20006253hbx]# /opt/ppc_gcc4.8.2_glibc2.18.0_multi/bin/ppc64_e5500-hardfloat-linux-gnu-addr2line -e bcma.ko 000000000000CE40 -f BCMX /home/workspace/uppv2.1_dailybuild/code/xtn/bsp/source/cpus/linux/config-linux-3.10.55-cgel/T12/linux/drivers/bsp/source/boards//linux/../../../../so/sp/ethswitch/bcmx/ksource/bsp_bcm_api.c:2234 [root@A20006253 hbx]# |
由此可知出错地方是在bsp_bcm_api.c文件的第2234行