情况1. RHEL5.3_x86 安装时配置了kdump
配置: # yum install kexec-tools
# vi /etc/kdump.conf
path /var/crash //注销此行
# vi /etc/grub.conf
检查是否有
# chkconfig kdump on
# service kdump start
然后在client上模拟crash
# echo 1 >/proc/sys/kernel/sysrq
# echo c >/proc/sysrq-trigger
#然后检查 /var/crash/目录
情况2:RHEL5.3_x86 安装时没有配置kdump,改为自己手动启动
# service kdump start
No kdump initial ramdisk found.
# system-config-kdump
以下是转载的文章
实验笔记:
netdump
netdump需要两台机器配合。
如果client端死机,则会把内存里面的信息dump到server。
client:
OS: Red Hat Enterprise Linux 4 update 7
server:
OS: Red Hat Enterprise Linux 4 update 7
client网络环境:
[root@station1~]# ifconfig
eth0 Link encap:Ethernet HWaddr 00:0C:29:22:61:B5
inet addr:192.168.0.1 Bcast:192.168.255.255 Mask:255.255.0.0
inet6 addr: fe80::20c:29ff:fe22:61b5/64 Scope:Link
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
RX packets:4 errors:0 dropped:0 overruns:0 frame:0
TX packets:9 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:240 (240.0 b) TX bytes:546 (546.0 b)
Interrupt:185 Base address:0x2000
eth1 Link encap:Ethernet HWaddr 00:0C:29:22:61:BF
inet addr:10.66.0.194 Bcast:10.66.1.255 Mask:255.255.254.0
inet6 addr: fe80::20c:29ff:fe22:61bf/64 Scope:Link
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
RX packets:68 errors:0 dropped:0 overruns:0 frame:0
TX packets:57 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:9962 (9.7 KiB) TX bytes:7759 (7.5 KiB)
Interrupt:177 Base address:0x2080
lo Link encap:Local Loopback
inet addr:127.0.0.1 Mask:255.0.0.0
inet6 addr: ::1/128 Scope:Host
UP LOOPBACK RUNNING MTU:16436 Metric:1
RX packets:8 errors:0 dropped:0 overruns:0 frame:0
TX packets:8 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:0
RX bytes:560 (560.0 b) TX bytes:560 (560.0 b)
[root@station1~]# route
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
10.66.0.0 * 255.255.254.0 U 0 0 0 eth1
169.254.0.0 * 255.255.0.0 U 0 0 0 eth1
192.168.0.0 * 255.255.0.0 U 0 0 0 eth0
default 10.66.1.254 0.0.0.0 UG 0 0 0 eth1
[root@station1~]#
server网络环境:
[root@station2~]# ifconfig
eth0 Link encap:Ethernet HWaddr 00:0C:29:32:64:A0
inet addr:192.168.0.2 Bcast:192.168.255.255 Mask:255.255.0.0
inet6 addr: fe80::20c:29ff:fe32:64a0/64 Scope:Link
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
RX packets:0 errors:0 dropped:0 overruns:0 frame:0
TX packets:9 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:0 (0.0 b) TX bytes:546 (546.0 b)
Interrupt:185 Base address:0x2000
eth1 Link encap:Ethernet HWaddr 00:0C:29:32:64:AA
inet addr:10.66.0.114 Bcast:10.66.1.255 Mask:255.255.254.0
inet6 addr: fe80::20c:29ff:fe32:64aa/64 Scope:Link
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
RX packets:126 errors:0 dropped:0 overruns:0 frame:0
TX packets:55 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:16122 (15.7 KiB) TX bytes:7651 (7.4 KiB)
Interrupt:177 Base address:0x2080
lo Link encap:Local Loopback
inet addr:127.0.0.1 Mask:255.0.0.0
inet6 addr: ::1/128 Scope:Host
UP LOOPBACK RUNNING MTU:16436 Metric:1
RX packets:8 errors:0 dropped:0 overruns:0 frame:0
TX packets:8 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:0
RX bytes:560 (560.0 b) TX bytes:560 (560.0 b)
[root@station2~]# route
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
10.66.0.0 * 255.255.254.0 U 0 0 0 eth1
169.254.0.0 * 255.255.0.0 U 0 0 0 eth1
192.168.0.0 * 255.255.0.0 U 0 0 0 eth0
default 10.66.1.254 0.0.0.0 UG 0 0 0 eth1
[root@station2~]#
配置server
[root@station2~]# rpm -q netdump-server
package netdump-server is not installed
[root@station2~]# mount /dev/cdrom /media/
mount: block device /dev/cdrom is write-protected, mounting read-only
[root@station2~]# cd /media/RedHat/RPMS/
[root@station2 RPMS]# rpm -ivh netdump-server-0.7.16-14.i386.rpm
warning: netdump-server-0.7.16-14.i386.rpm: V3 DSA signature: NOKEY, key ID db42a60e
Preparing... ########################################### [100%]
1:netdump-server ########################################### [100%]
[root@station2 RPMS]# service netdump-server
Usage: netdump-server {start|stop|status|restart|condrestart}
[root@station2 RPMS]# service netdump-server start
Starting netdump server: [ OK ]
[root@station2 RPMS]# chkconfig --level 35 netdump-server on
[root@station2 RPMS]# passwd netdump
Changing password for user netdump.
New UNIX password:
BAD PASSWORD: it is based on a dictionary word
Retype new UNIX password:
passwd: all authentication tokens updated successfully.
[root@station2 RPMS]#
安装netdump-server会在系统里面添加netdump这个用户。作为netdump时候传输数据之用。但是并不会给这个用户创建密码。所以这个用户默认是给锁住的。因此记得给netdump这个用户分配密码!
配置client
[root@station1~]# rpm -q netdump
netdump-0.7.16-14
[root@station1~]# vi /etc/sysconfig/netdump
指定netdump-server的IP地址
NETDUMPADDR=192.168.0.2
[root@station1~]# service netdump start
password:
initializing netdump [ OK ]
initializing netconsole [ OK ]
Message from at Mon Sep 22 17:47:38 2008 ...
station1 kernel: [...network console startup...]
[root@station1~]#
然后在client上模拟crash
[root@station1~]# echo 1 >/proc/sys/kernel/sysrq
[root@station1~]# echo c >/proc/sysrq-trigger
然后netdump就给激活了。
当dump完数据,机器自动重启。
在server上检验数据
[root@station2~]# cd /var/crash/
[root@station2 crash]# ll
total16
drwx------ 2 netdump netdump 4096 Sep 22 18:05 192.168.0.1-2008-09-22-18:04
drwx------ 2 netdump netdump 4096 Sep 22 17:48 magic
drwxr-xr-x 2 netdump netdump 4096 Aug 4 2007 scripts
[root@station2 crash]# cd 192.168.0.1-2008-09-22-18\:04/
[root@station2192.168.0.1-2008-09-22-18:04]# ll
total81656
-rw------- 1 netdump netdump 30592 Sep 22 18:05 log
-rw------- 1 netdump netdump 268439552 Sep 22 18:05 vmcore
[root@station2192.168.0.1-2008-09-22-18:04]# ll -h
total80M
-rw------- 1 netdump netdump 30K Sep 22 18:05 log
-rw------- 1 netdump netdump 257M Sep 22 18:05 vmcore
[root@station2192.168.0.1-2008-09-22-18:04]#
vmcore就是发生crash的时候内存里的数据。
diskdump
diskdump在Red Hat Enterprise Linux 4 update 4之前需要一块单独的硬盘。
在Red Hat Enterprise Linux 4 update 4以及之后,只需要一块单独的磁盘分区。
试验环境:
Red Hat Enterprise Linux 4 update 7
原本采用Red Hat Enterprise Linux 5 update 2,但是发现diskdump在RHEL5以及更高版本上被kdump所替代。
试验步骤
先划分一块分区给diskdump。
[root@dhcp-0-084~]# fdisk /dev/sdb
Device contains neither a valid DOS partition table, nor Sun, SGI or OSF disklabel
Building a new DOS disklabel. Changes will remain in memory only,
until you decide to write them. After that, of course, the previous
content won't be recoverable.
Warning: invalid flag 0x0000 of partition table 4 will be corrected by w(rite)
Command(m for help): p
Disk/dev/sdb: 4294 MB, 4294967296 bytes
255 heads, 63 sectors/track, 522 cylinders
Units= cylinders of 16065 * 512 = 8225280 bytes
Device Boot Start End Blocks Id System
Command(m for help): n
Command action
e extended
p primary partition (1-4)
p
Partition number (1-4): 1
First cylinder (1-522, default 1):
Using default value 1
Last cylinder or +size or +sizeM or +sizeK (1-522, default 522):
Using default value 522
Command(m for help): w
The partition table has been altered!
Calling ioctl() to re-read partition table.
Syncing disks.
partprobe[root@dhcp-0-084~]# partprobe
Warning: Unable to open /dev/hdc read-write (Read-only file system). /dev/hdc has been opened read-only.
No changes made to disk, exiting partprobe.
[root@dhcp-0-084~]#
我们用/dev/sdb1作为diskdump的数据临时存放点。
编辑diskdump配置文件。
[root@dhcp-0-084 RPMS]# rpm -qa | grep diskdump
diskdumputils-1.4.1-5
[root@dhcp-0-084 RPMS]# vi /etc/sysconfig/diskdump
DEVICE=/dev/sdb1
初始化并启动diskdump服务。
[root@dhcp-0-084~]# service diskdump initialformat
Formatting dump device:
Do you want to format /dev/sdb1 (yes/NO)? yes
/dev/sdb1:[100.0%]
[root@dhcp-0-084~]# service diskdump restart
Starting diskdump: [ OK ]
[root@dhcp-0-084~]# chkconfig --level 35 diskdump on
[root@dhcp-0-084~]#
然后模拟死机。
[root@dhcp-0-084 RPMS]# echo 1 >/proc/sys/kernel/sysrq
[root@dhcp-0-084 RPMS]# echo c >/proc/sysrq-trigger
此时diskdump开始工作。把内存中的数据dump到/dev/sdb1中。
diskdump之后必须手动重启机器。
重启机器后,diskdump会把/dev/sdb1里面的数据拷到/var/crash文件夹下。
检查diskdump数据。
[root@dhcp-0-084~]# cd /var/crash/
[root@dhcp-0-084 crash]# ll
total12
drwx------ 2 root root 4096 Sep 22 19:52 127.0.0.1-2008-09-22-19:49
drwxr-xr-x 2 netdump netdump 4096 Mar 25 23:33 scripts
[root@dhcp-0-084 crash]# cd 127.0.0.1-2008-09-22-19\:49/
[root@dhcp-0-084127.0.0.1-2008-09-22-19:49]# ll -h
total86M
-rw------- 1 root root 513M Sep 22 19:52 vmcore
[root@dhcp-0-084127.0.0.1-2008-09-22-19:49]#
kdump
实验环境:
client OS: Red Hat Enterprise Linux 5 update 2
client IP: 10.66.0.157
server OS: Red Hat Enterprise Linux 4 update 7
server IP: 10.66.0.84
kdump属于kexec-tools-1.102pre-21.el5这个包。一般装系统都有装上。
kdump支持N种方式存储内存数据,包括裸设备,文件系统,nfs,ssh。并且能设定dump前和dump后运行脚本以及执行其他动作。正所谓很好很强大。
kdump的dump机制是:预先生成一个crashkernel,在内核crash的时候,激活这个crashkernel,用这个crashkernel载入的小型系统dump处于crash状态的内核。
实验步骤:
这次我会配置kdump把内存数据scp到备份服务器上。
在client上:
安装kdump
[root@dhcp-0-157~]# cat /etc/redhat-release
Red Hat Enterprise Linux Server release 5.2 (Tikanga)
[root@dhcp-0-157~]# cd /misc/cd/Server
[root@dhcp-0-157 Server]# rpm -ivh busybox-1.2.0-4.el5.i386.rpm
warning: busybox-1.2.0-4.el5.i386.rpm: Header V3 DSA signature: NOKEY, key ID37017186
Preparing... ########################################### [100%]
1:busybox ########################################### [100%]
[root@dhcp-0-157 Server]# rpm -ivh kexec-tools-1.102pre-21.el5.i386.rpm
warning: kexec-tools-1.102pre-21.el5.i386.rpm: Header V3 DSA signature: NOKEY, key ID 37017186
Preparing... ########################################### [100%]
1:kexec-tools ########################################### [100%]
[root@dhcp-0-157 Server]#
配置kdump
[root@dhcp-0-157~]# vim /etc/kdump.conf
对kernel开启kdump支持
[root@dhcp-0-157~]# vim /boot/grub/grub.conf
在kernel选项上添加crashkernel参数。
参数格式是:
crashkernel=nn[KMG]@ss[KMG]
nn表示要为crashkernel预留多少内存
ss表示为crashkernel预留内存的起始位置
default=0
timeout=5
splashimage=(hd0,0)/grub/splash.xpm.gz
hiddenmenu
title Red Hat Enterprise Linux Server (2.6.18-92.el5)
root(hd0,0)
kernel/vmlinuz-2.6.18-92.el5 ro root=LABEL=/
initrd/initrd-2.6.18-92.el5.img
重启电脑使新参数生效。
然后启用kdump服务
[root@dhcp-0-157~]# service kdump
Usage:/etc/init.d/kdump {start|stop|status|restart|propagate}
[root@dhcp-0-157~]# service kdump propagate
Generating new ssh keys... done.
The authenticity of host '10.66.0.84 (10.66.0.84)' can't be established.
RSA key fingerprint is 31:c2:d8:b6:eb:2e:03:64:cd:ba:56:e9:49:6e:5d:6c.
Are you sure you want to continue connecting (yes/no)? yes
Warning: Permanently added '10.66.0.84' (RSA) to the list of known hosts.
password:
/root/.ssh/kdump_id_rsa.pub has been added to ~root/.ssh/authorized_keys2 on 10.66.0.84
[root@dhcp-0-157~]# service kdump restart
Stopping kdump:[ OK ]
No kdump initial ramdisk found.[WARNING]
Rebuilding/boot/initrd-2.6.18-92.el5kdump.img
Starting kdump:[ OK ]
[root@dhcp-0-157~]# chkconfig --level 35 kdump on
[root@dhcp-0-157~]#
最后模拟crash:
[root@dhcp-0-157 ~]# echo 1 >/proc/sys/kernel/sysrq
[root@dhcp-0-157 ~]# echo c >/proc/sysrq-trigger
在server的/var/crash下可以看到由client转储过来的内核数据。
另外,kdump在完成内核转储后会自动重启。
阅读(2000) | 评论(0) | 转发(0) |