Chinaunix首页 | 论坛 | 博客
  • 博客访问: 487749
  • 博文数量: 82
  • 博客积分: 3003
  • 博客等级: 中校
  • 技术积分: 1285
  • 用 户 组: 普通用户
  • 注册时间: 2007-09-11 15:27
文章分类

全部博文(82)

文章存档

2011年(1)

2010年(5)

2009年(63)

2008年(13)

我的朋友

分类: LINUX

2008-09-22 23:05:40

PDF下载:


文件:试验笔记:配置各种内核转储dump.pdf
大小:116KB
下载:下载

配置各种内核转储dump


撰写者信息:

Alin Fang (Fang Yunlin)

MSN:

G Talk:

Blog: http://www.alinblog.cn/


修改日期:

22 Sep, 2008

第一次修改



版权:

GNU


声明:

本人实验笔记,非权威文档。如有错误请告知作者。十分感谢!


实验笔记:


netdump


netdump需要两台机器配合。

如果client端死机,则会把内存里面的信息dumpserver


client:

OS: Red Hat Enterprise Linux 4 update 7


server:

OS: Red Hat Enterprise Linux 4 update 7


client网络环境:

[root@station1 ~]# ifconfig

eth0 Link encap:Ethernet HWaddr 00:0C:29:22:61:B5

inet addr:192.168.0.1 Bcast:192.168.255.255 Mask:255.255.0.0

inet6 addr: fe80::20c:29ff:fe22:61b5/64 Scope:Link

UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1

RX packets:4 errors:0 dropped:0 overruns:0 frame:0

TX packets:9 errors:0 dropped:0 overruns:0 carrier:0

collisions:0 txqueuelen:1000

RX bytes:240 (240.0 b) TX bytes:546 (546.0 b)

Interrupt:185 Base address:0x2000


eth1 Link encap:Ethernet HWaddr 00:0C:29:22:61:BF

inet addr:10.66.0.194 Bcast:10.66.1.255 Mask:255.255.254.0

inet6 addr: fe80::20c:29ff:fe22:61bf/64 Scope:Link

UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1

RX packets:68 errors:0 dropped:0 overruns:0 frame:0

TX packets:57 errors:0 dropped:0 overruns:0 carrier:0

collisions:0 txqueuelen:1000

RX bytes:9962 (9.7 KiB) TX bytes:7759 (7.5 KiB)

Interrupt:177 Base address:0x2080


lo Link encap:Local Loopback

inet addr:127.0.0.1 Mask:255.0.0.0

inet6 addr: ::1/128 Scope:Host

UP LOOPBACK RUNNING MTU:16436 Metric:1

RX packets:8 errors:0 dropped:0 overruns:0 frame:0

TX packets:8 errors:0 dropped:0 overruns:0 carrier:0

collisions:0 txqueuelen:0

RX bytes:560 (560.0 b) TX bytes:560 (560.0 b)


[root@station1 ~]# route

Kernel IP routing table

Destination Gateway Genmask Flags Metric Ref Use Iface

10.66.0.0 * 255.255.254.0 U 0 0 0 eth1

169.254.0.0 * 255.255.0.0 U 0 0 0 eth1

192.168.0.0 * 255.255.0.0 U 0 0 0 eth0

default 10.66.1.254 0.0.0.0 UG 0 0 0 eth1

[root@station1 ~]#

server网络环境:

[root@station2 ~]# ifconfig

eth0 Link encap:Ethernet HWaddr 00:0C:29:32:64:A0

inet addr:192.168.0.2 Bcast:192.168.255.255 Mask:255.255.0.0

inet6 addr: fe80::20c:29ff:fe32:64a0/64 Scope:Link

UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1

RX packets:0 errors:0 dropped:0 overruns:0 frame:0

TX packets:9 errors:0 dropped:0 overruns:0 carrier:0

collisions:0 txqueuelen:1000

RX bytes:0 (0.0 b) TX bytes:546 (546.0 b)

Interrupt:185 Base address:0x2000


eth1 Link encap:Ethernet HWaddr 00:0C:29:32:64:AA

inet addr:10.66.0.114 Bcast:10.66.1.255 Mask:255.255.254.0

inet6 addr: fe80::20c:29ff:fe32:64aa/64 Scope:Link

UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1

RX packets:126 errors:0 dropped:0 overruns:0 frame:0

TX packets:55 errors:0 dropped:0 overruns:0 carrier:0

collisions:0 txqueuelen:1000

RX bytes:16122 (15.7 KiB) TX bytes:7651 (7.4 KiB)

Interrupt:177 Base address:0x2080


lo Link encap:Local Loopback

inet addr:127.0.0.1 Mask:255.0.0.0

inet6 addr: ::1/128 Scope:Host

UP LOOPBACK RUNNING MTU:16436 Metric:1

RX packets:8 errors:0 dropped:0 overruns:0 frame:0

TX packets:8 errors:0 dropped:0 overruns:0 carrier:0

collisions:0 txqueuelen:0

RX bytes:560 (560.0 b) TX bytes:560 (560.0 b)


[root@station2 ~]# route

Kernel IP routing table

Destination Gateway Genmask Flags Metric Ref Use Iface

10.66.0.0 * 255.255.254.0 U 0 0 0 eth1

169.254.0.0 * 255.255.0.0 U 0 0 0 eth1

192.168.0.0 * 255.255.0.0 U 0 0 0 eth0

default 10.66.1.254 0.0.0.0 UG 0 0 0 eth1

[root@station2 ~]#




配置server

[root@station2 ~]# rpm -q netdump-server

package netdump-server is not installed

[root@station2 ~]# mount /dev/cdrom /media/

mount: block device /dev/cdrom is write-protected, mounting read-only

[root@station2 ~]# cd /media/RedHat/RPMS/

[root@station2 RPMS]# rpm -ivh netdump-server-0.7.16-14.i386.rpm

warning: netdump-server-0.7.16-14.i386.rpm: V3 DSA signature: NOKEY, key ID db42a60e

Preparing... ########################################### [100%]

1:netdump-server ########################################### [100%]

[root@station2 RPMS]# service netdump-server

Usage: netdump-server {start|stop|status|restart|condrestart}

[root@station2 RPMS]# service netdump-server start

Starting netdump server: [ OK ]

[root@station2 RPMS]# chkconfig --level 35 netdump-server on

[root@station2 RPMS]# passwd netdump

Changing password for user netdump.

New UNIX password:

BAD PASSWORD: it is based on a dictionary word

Retype new UNIX password:

passwd: all authentication tokens updated successfully.

[root@station2 RPMS]#

安装netdump-server会在系统里面添加netdump这个用户。作为netdump时候传输数据之用。但是并不会给这个用户创建密码。所以这个用户默认是给锁住的。因此记得给netdump这个用户分配密码!



配置client

[root@station1 ~]# rpm -q netdump

netdump-0.7.16-14

[root@station1 ~]# vi /etc/sysconfig/netdump



指定netdump-serverIP地址

NETDUMPADDR=192.168.0.2





[root@station1 ~]# service netdump start

netdump@10.66.0.114's password:

initializing netdump [ OK ]

initializing netconsole [ OK ]


Message from syslogd@station1 at Mon Sep 22 17:47:38 2008 ...

station1 kernel: [...network console startup...]

[root@station1 ~]#



然后在client上模拟crash





[root@station1 ~]# echo 1 > /proc/sys/kernel/sysrq

[root@station1 ~]# echo c > /proc/sysrq-trigger

然后netdump就给激活了。

dump完数据,机器自动重启。



server上检验数据

[root@station2 ~]# cd /var/crash/

[root@station2 crash]# ll

total 16

drwx------ 2 netdump netdump 4096 Sep 22 18:05 192.168.0.1-2008-09-22-18:04

drwx------ 2 netdump netdump 4096 Sep 22 17:48 magic

drwxr-xr-x 2 netdump netdump 4096 Aug 4 2007 scripts

[root@station2 crash]# cd 192.168.0.1-2008-09-22-18\:04/

[root@station2 192.168.0.1-2008-09-22-18:04]# ll

total 81656

-rw------- 1 netdump netdump 30592 Sep 22 18:05 log

-rw------- 1 netdump netdump 268439552 Sep 22 18:05 vmcore

[root@station2 192.168.0.1-2008-09-22-18:04]# ll -h

total 80M

-rw------- 1 netdump netdump 30K Sep 22 18:05 log

-rw------- 1 netdump netdump 257M Sep 22 18:05 vmcore

[root@station2 192.168.0.1-2008-09-22-18:04]#



vmcore就是发生crash的时候内存里的数据。





diskdump

diskdumpRed Hat Enterprise Linux 4 update 4之前需要一块单独的硬盘。

Red Hat Enterprise Linux 4 update 4以及之后,只需要一块单独的磁盘分区。

试验环境:

Red Hat Enterprise Linux 4 update 7

原本采用Red Hat Enterprise Linux 5 update 2,但是发现diskdumpRHEL5以及更高版本上被kdump所替代。

试验步骤

先划分一块分区给diskdump

[root@dhcp-0-084 ~]# fdisk /dev/sdb

Device contains neither a valid DOS partition table, nor Sun, SGI or OSF disklabel

Building a new DOS disklabel. Changes will remain in memory only,

until you decide to write them. After that, of course, the previous

content won't be recoverable.


Warning: invalid flag 0x0000 of partition table 4 will be corrected by w(rite)


Command (m for help): p


Disk /dev/sdb: 4294 MB, 4294967296 bytes

255 heads, 63 sectors/track, 522 cylinders

Units = cylinders of 16065 * 512 = 8225280 bytes


Device Boot Start End Blocks Id System


Command (m for help): n

Command action

e extended

p primary partition (1-4)

p

Partition number (1-4): 1

First cylinder (1-522, default 1):

Using default value 1

Last cylinder or +size or +sizeM or +sizeK (1-522, default 522):

Using default value 522


Command (m for help): w

The partition table has been altered!


Calling ioctl() to re-read partition table.

Syncing disks.

partprobe[root@dhcp-0-084 ~]# partprobe

Warning: Unable to open /dev/hdc read-write (Read-only file system). /dev/hdc has been opened read-only.


No changes made to disk, exiting partprobe.

[root@dhcp-0-084 ~]#



我们用/dev/sdb1作为diskdump的数据临时存放点。



编辑diskdump配置文件。



[root@dhcp-0-084 RPMS]# rpm -qa | grep diskdump

diskdumputils-1.4.1-5

[root@dhcp-0-084 RPMS]# vi /etc/sysconfig/diskdump




DEVICE=/dev/sdb1

初始化并启动diskdump服务。

[root@dhcp-0-084 ~]# service diskdump initialformat

Formatting dump device:

Do you want to format /dev/sdb1 (yes/NO)? yes

/dev/sdb1: [100.0%]

[root@dhcp-0-084 ~]# service diskdump restart

Starting diskdump: [ OK ]

[root@dhcp-0-084 ~]# chkconfig --level 35 diskdump on

[root@dhcp-0-084 ~]#





然后模拟死机。

[root@dhcp-0-084 RPMS]# echo 1 > /proc/sys/kernel/sysrq

[root@dhcp-0-084 RPMS]# echo c > /proc/sysrq-trigger



此时diskdump开始工作。把内存中的数据dump/dev/sdb1中。

diskdump之后必须手动重启机器。

重启机器后,diskdump会把/dev/sdb1里面的数据拷到/var/crash文件夹下。





检查diskdump数据。


[root@dhcp-0-084 ~]# cd /var/crash/

[root@dhcp-0-084 crash]# ll

total 12

drwx------ 2 root root 4096 Sep 22 19:52 127.0.0.1-2008-09-22-19:49

drwxr-xr-x 2 netdump netdump 4096 Mar 25 23:33 scripts

[root@dhcp-0-084 crash]# cd 127.0.0.1-2008-09-22-19\:49/

[root@dhcp-0-084 127.0.0.1-2008-09-22-19:49]# ll -h

total 86M

-rw------- 1 root root 513M Sep 22 19:52 vmcore

[root@dhcp-0-084 127.0.0.1-2008-09-22-19:49]#





kdump

实验环境:

client OS: Red Hat Enterprise Linux 5 update 2

client IP: 10.66.0.157

server OS: Red Hat Enterprise Linux 4 update 7

server IP: 10.66.0.84

kdump属于kexec-tools-1.102pre-21.el5这个包。一般装系统都有装上。

kdump支持N种方式存储内存数据,包括裸设备,文件系统,nfsssh。并且能设定dump前和dump后运行脚本以及执行其他动作。正所谓很好很强大。

kdumpdump机制是:预先生成一个crashkernel,在内核crash的时候,激活这个crashkernel,用这个crashkernel载入的小型系统dump处于crash状态的内核。

实验步骤:

这次我会配置kdump把内存数据scp到备份服务器上。

client上:


安装kdump

[root@dhcp-0-157 ~]# cat /etc/redhat-release

Red Hat Enterprise Linux Server release 5.2 (Tikanga)

[root@dhcp-0-157 ~]# cd /misc/cd/Server

[root@dhcp-0-157 Server]# rpm -ivh busybox-1.2.0-4.el5.i386.rpm

warning: busybox-1.2.0-4.el5.i386.rpm: Header V3 DSA signature: NOKEY, key ID 37017186

Preparing... ########################################### [100%]

1:busybox ########################################### [100%]

[root@dhcp-0-157 Server]# rpm -ivh kexec-tools-1.102pre-21.el5.i386.rpm

warning: kexec-tools-1.102pre-21.el5.i386.rpm: Header V3 DSA signature: NOKEY, key ID 37017186

Preparing... ########################################### [100%]

1:kexec-tools ########################################### [100%]

[root@dhcp-0-157 Server]#




配置kdump

[root@dhcp-0-157 ~]# vim /etc/kdump.conf

net root@10.66.0.84



kernel开启kdump支持

[root@dhcp-0-157 ~]# vim /boot/grub/grub.conf

kernel选项上添加crashkernel参数。

参数格式是:

crashkernel=nn[KMG]@ss[KMG]

nn表示要为crashkernel预留多少内存

ss表示为crashkernel预留内存的起始位置



default=0

timeout=5

splashimage=(hd0,0)/grub/splash.xpm.gz

hiddenmenu

title Red Hat Enterprise Linux Server (2.6.18-92.el5)

root (hd0,0)

kernel /vmlinuz-2.6.18-92.el5 ro root=LABEL=/ crashkernel=256M@16M

initrd /initrd-2.6.18-92.el5.img


重启电脑使新参数生效。



然后启用kdump服务

[root@dhcp-0-157 ~]# service kdump

Usage: /etc/init.d/kdump {start|stop|status|restart|propagate}

[root@dhcp-0-157 ~]# service kdump propagate

Generating new ssh keys... done.

The authenticity of host '10.66.0.84 (10.66.0.84)' can't be established.

RSA key fingerprint is 31:c2:d8:b6:eb:2e:03:64:cd:ba:56:e9:49:6e:5d:6c.

Are you sure you want to continue connecting (yes/no)? yes

Warning: Permanently added '10.66.0.84' (RSA) to the list of known hosts.

root@10.66.0.84's password:

/root/.ssh/kdump_id_rsa.pub has been added to ~root/.ssh/authorized_keys2 on 10.66.0.84

[root@dhcp-0-157 ~]# service kdump restart

Stopping kdump:[ OK ]

No kdump initial ramdisk found.[WARNING]

Rebuilding /boot/initrd-2.6.18-92.el5kdump.img

Starting kdump:[ OK ]

[root@dhcp-0-157 ~]# chkconfig --level 35 kdump on

[root@dhcp-0-157 ~]#



最后模拟crash

[root@dhcp-0-157 ~]# echo 1 > /proc/sys/kernel/sysrq

[root@dhcp-0-157 ~]# echo c > /proc/sysrq-trigger



server/var/crash下可以看到由client转储过来的内核数据。

另外,kdump在完成内核转储后会自动重启。


阅读(2124) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~