Chinaunix首页 | 论坛 | 博客
  • 博客访问: 33310
  • 博文数量: 9
  • 博客积分: 265
  • 博客等级: 二等列兵
  • 技术积分: 100
  • 用 户 组: 普通用户
  • 注册时间: 2010-05-26 16:20
文章分类
文章存档

2020年(1)

2017年(2)

2016年(1)

2010年(5)

我的朋友

分类: LINUX

2016-11-25 14:06:44

经常会遇到kernel panic的情况,熟练掌握crash如何对vmcore分析可以提升工作效率。


一,一些依赖包的安装
yum install -y crash
debuginfo-install kernel

二、分析过程
1. crash vmcore /usr/lib/debug/lib/modules/3.10.0-514.el7.x86_64/vmlinux

[16280.734352] BUG: unable to handle kernel NULL pointer dereference at 00000000000000b8
[16280.735299] IP: [] raid10_sync_request+0x579/0x1a90 [raid10]
[16280.735299] PGD 0
[16280.735299] Oops: 0000 [#1] SMP
[16280.735299] Modules linked in: raid10 ext4 mbcache jbd2 raid1 loop nfsv3 rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache snd_hda_codec_realtek snd_hda_codec_generic snd_hda_intel snd_hda_codec coretemp snd_hda_core iTCO_wdt hp_wmi ppdev sg sparse_keymap gpio_ich iTCO_vendor_support snd_hwdep snd_seq kvm i5000_edac edac_core rfkill snd_seq_device snd_pcm irqbypass pcspkr lpc_ich snd_timer i5k_amb snd parport_pc shpchp parport soundcore dm_multipath nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod crc_t10dif crct10dif_generic sr_mod cdrom crct10dif_common ata_generic pata_acpi nouveau video mxm_wmi drm_kms_helper syscopyarea sysfillrect ahci sysimgblt ata_piix fb_sys_fops libahci ttm libata serio_raw igb drm mptsas dca scsi_transport_sas tg3 i2c_algo_bit mptscsih i2c_core ptp
[16280.735299]  mptbase pps_core wmi floppy fjes dm_mirror dm_region_hash dm_log dm_mod
[16280.735299] CPU: 3 PID: 26188 Comm: md0_resync Not tainted 3.10.0-514.el7.x86_64 #1
[16280.735299] Hardware name: Hewlett-Packard HP xw6400 Workstation/0A04h, BIOS 786D4 v02.38 10/25/2010
[16280.735299] task: ffff88003559bec0 ti: ffff8800591e8000 task.ti: ffff8800591e8000
[16280.735299] RIP: 0010:[]  [] raid10_sync_request+0x579/0x1a90 [raid10]
[16280.735299] RSP: 0018:ffff8800591ebbc8  EFLAGS: 00010202
[16280.735299] RAX: 00000000002b3780 RBX: ffff88006f8e0b00 RCX: 0000000000000002
[16280.735299] RDX: ffff880053665860 RSI: ffff88006fb58800 RDI: 00000000002b3380
[16280.735299] RBP: ffff8800591ebca0 R08: 0000000000000000 R09: 0000000000000007
[16280.735299] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
[16280.735299] R13: ffff880053665840 R14: 0000000000000000 R15: ffff880053fbd400
[16280.735299] FS:  0000000000000000(0000) GS:ffff88007fcc0000(0000) knlGS:0000000000000000
[16280.735299] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[16280.735299] CR2: 00000000000000b8 CR3: 0000000078f19000 CR4: 00000000000007e0
[16280.735299] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[16280.735299] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[16280.735299] Stack:
[16280.735299]  ffff8800591ebd14 00000000000003ff 0000000000000000 00000000810d2f4f
[16280.735299]  00000000002b3800 0000000000000000 0000000000000000 ffff880000000007
[16280.735299]  ffff88006fb58800 0000000000000000 00000000002b3780 ffff880000000080
[16280.735299] Call Trace:
[16280.735299]  [] ? sysfs_notify_dirent+0x50/0x60
[16280.735299]  [] md_do_sync+0xabb/0x1000
[16280.735299]  [] ? wake_up_atomic_t+0x30/0x30
[16280.735299]  [] md_thread+0x155/0x1a0
[16280.735299]  [] ? find_pers+0x80/0x80
[16280.735299]  [] kthread+0xcf/0xe0
[16280.735299]  [] ? end_buffer_async_read+0x130/0x130
[16280.735299]  [] ? kthread_create_on_node+0x140/0x140
[16280.735299]  [] ret_from_fork+0x58/0x90
[16280.735299]  [] ? kthread_create_on_node+0x140/0x140
[16280.735299] Code: ff 31 c0 0f 1f 80 00 00 00 00 48 98 48 c1 e0 05 49 8b 44 05 58 45 89 4d 60 49 89 7d 58 45 89 a5 80 00 00 00 49 89 45 78 4c 8b 33 <49> 8b 96 b8 00 00 00 83 e2 02 0f 84 0f 02 00 00 49 8b 55 68 48
[16280.735299] RIP  [] raid10_sync_request+0x579/0x1a90 [raid10]
[16280.735299]  RSP
[16280.735299] CR2: 00000000000000b8
2. set 命令
crash> set 26188
    PID: 26188
COMMAND: "md0_resync"
   TASK: ffff88003559bec0  [THREAD_INFO: ffff8800591e8000]
    CPU: 3
  STATE: TASK_RUNNING (PANIC)
3. bt命令
crash> bt
PID: 26188  TASK: ffff88003559bec0  CPU: 3   COMMAND: "md0_resync"
 #0 [ffff8800591eb868] machine_kexec at ffffffff81059cdb
 #1 [ffff8800591eb8c8] __crash_kexec at ffffffff81105182
 #2 [ffff8800591eb998] crash_kexec at ffffffff81105270
 #3 [ffff8800591eb9b0] oops_end at ffffffff8168ed88
 #4 [ffff8800591eb9d8] no_context at ffffffff8167e993
 #5 [ffff8800591eba28] __bad_area_nosemaphore at ffffffff8167ea29
 #6 [ffff8800591eba70] bad_area_nosemaphore at ffffffff8167eb93
 #7 [ffff8800591eba80] __do_page_fault at ffffffff81691b1e
 #8 [ffff8800591ebae0] do_page_fault at ffffffff81691cc5
 #9 [ffff8800591ebb10] page_fault at ffffffff8168df88
    [exception RIP: raid10_sync_request+1401]
    RIP: ffffffffa09b09e9  RSP: ffff8800591ebbc8  RFLAGS: 00010202                    #RIP 是函数中panic时的地址
    RAX: 00000000002b3780  RBX: ffff88006f8e0b00  RCX: 0000000000000002
    RDX: ffff880053665860  RSI: ffff88006fb58800  RDI: 00000000002b3380
    RBP: ffff8800591ebca0   R8: 0000000000000000   R9: 0000000000000007
    R10: 0000000000000000  R11: 0000000000000000  R12: 0000000000000000
    R13: ffff880053665840  R14: 0000000000000000  R15: ffff880053fbd400
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
#10 [ffff8800591ebca8] md_do_sync at ffffffff814fe94b
#11 [ffff8800591ebe50] md_thread at ffffffff814fab65
#12 [ffff8800591ebec8] kthread at ffffffff810b052f
#13 [ffff8800591ebf50] ret_from_fork at ffffffff81696418
4. 反汇编
crash> mod -s raid10
     MODULE       NAME                      SIZE  OBJECT FILE
ffffffffa09b62e0  raid10                   48005  /lib/modules/3.10.0-514.el7.x86_64/kernel/drivers/md/raid10.ko
crash> dis -l raid10_sync_request | grep -C 6 ffffffffa09b09e9
0xffffffffa09b09db :    mov    %r12d,0x80(%r13)
/usr/src/debug/kernel-3.10.0-514.el7/linux-3.10.0-514.el7.x86_64/drivers/md/raid10.c: 3204
0xffffffffa09b09e2 :    mov    %rax,0x78(%r13)
/usr/src/debug/kernel-3.10.0-514.el7/linux-3.10.0-514.el7.x86_64/drivers/md/raid10.c: 3206
0xffffffffa09b09e6 :    mov    (%rbx),%r14
/usr/src/debug/kernel-3.10.0-514.el7/linux-3.10.0-514.el7.x86_64/arch/x86/include/asm/bitops.h: 319
0xffffffffa09b09e9 :    mov    0xb8(%r14),%rdx
/usr/src/debug/kernel-3.10.0-514.el7/linux-3.10.0-514.el7.x86_64/drivers/md/raid10.c: 3207
0xffffffffa09b09f0 :    and    $0x2,%edx
0xffffffffa09b09f3 :    je     0xffffffffa09b0c08
/usr/src/debug/kernel-3.10.0-514.el7/linux-3.10.0-514.el7.x86_64/drivers/md/raid10.c: 3220
0xffffffffa09b09f9 :    mov    0x68(%r13),%rdx
/usr/src/debug/kernel-3.10.0-514.el7/linux-3.10.0-514.el7.x86_64/drivers/md/raid10.c: 3186

发现是一个位操作,在3207行
3206                                 rdev = mirror->rdev;
3207                                 if (!test_bit(In_sync, &rdev->flags)) {

可以知道是mirror->rdev 为空指针。





阅读(3407) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~