Chinaunix首页 | 论坛 | 博客
  • 博客访问: 80914
  • 博文数量: 23
  • 博客积分: 0
  • 博客等级: 民兵
  • 技术积分: 12
  • 用 户 组: 普通用户
  • 注册时间: 2014-06-27 10:32
文章分类
文章存档

2016年(1)

2015年(8)

2014年(14)

我的朋友

分类: LINUX

2014-10-14 10:45:41

原文地址:Linux 配置 kdump 作者:unixthisyear

情况1. RHEL5.3_x86 安装时配置了kdump
配置: # yum install kexec-tools
       # vi /etc/kdump.conf
         path /var/crash //注销此行

       # vi /etc/grub.conf
         检查是否有
       # chkconfig kdump on
       # service kdump start
       然后在client上模拟crash
       # echo 1 >/proc/sys/kernel/sysrq
       # echo c >/proc/sysrq-trigger
       #然后检查 /var/crash/目录
 
情况2:RHEL5.3_x86 安装时没有配置kdump,改为自己手动启动
 
 # service kdump start
 No kdump initial ramdisk found.
 # system-config-kdump
 
以下是转载的文章
实验笔记:
  netdump
  netdump需要两台机器配合。
  如果client端死机,则会把内存里面的信息dump到server。
  client:
  OS: Red Hat Enterprise Linux 4 update 7
  server:
  OS: Red Hat Enterprise Linux 4 update 7
  client网络环境:
  [root@station1~]# ifconfig
  eth0 Link encap:Ethernet HWaddr 00:0C:29:22:61:B5
  inet addr:192.168.0.1 Bcast:192.168.255.255 Mask:255.255.0.0
  inet6 addr: fe80::20c:29ff:fe22:61b5/64 Scope:Link
  UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
  RX packets:4 errors:0 dropped:0 overruns:0 frame:0
  TX packets:9 errors:0 dropped:0 overruns:0 carrier:0
  collisions:0 txqueuelen:1000
  RX bytes:240 (240.0 b) TX bytes:546 (546.0 b)
  Interrupt:185 Base address:0x2000
  eth1 Link encap:Ethernet HWaddr 00:0C:29:22:61:BF
  inet addr:10.66.0.194 Bcast:10.66.1.255 Mask:255.255.254.0
  inet6 addr: fe80::20c:29ff:fe22:61bf/64 Scope:Link
  UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
  RX packets:68 errors:0 dropped:0 overruns:0 frame:0
  TX packets:57 errors:0 dropped:0 overruns:0 carrier:0
  collisions:0 txqueuelen:1000
  RX bytes:9962 (9.7 KiB) TX bytes:7759 (7.5 KiB)
  Interrupt:177 Base address:0x2080
  lo Link encap:Local Loopback
  inet addr:127.0.0.1 Mask:255.0.0.0
  inet6 addr: ::1/128 Scope:Host
  UP LOOPBACK RUNNING MTU:16436 Metric:1
  RX packets:8 errors:0 dropped:0 overruns:0 frame:0
  TX packets:8 errors:0 dropped:0 overruns:0 carrier:0
  collisions:0 txqueuelen:0
  RX bytes:560 (560.0 b) TX bytes:560 (560.0 b)
  [root@station1~]# route
  Kernel IP routing table
  Destination Gateway Genmask Flags Metric Ref Use Iface
  10.66.0.0 * 255.255.254.0 U 0 0 0 eth1
  169.254.0.0 * 255.255.0.0 U 0 0 0 eth1
  192.168.0.0 * 255.255.0.0 U 0 0 0 eth0
  default 10.66.1.254 0.0.0.0 UG 0 0 0 eth1
  [root@station1~]#
  server网络环境:
  [root@station2~]# ifconfig
  eth0 Link encap:Ethernet HWaddr 00:0C:29:32:64:A0
  inet addr:192.168.0.2 Bcast:192.168.255.255 Mask:255.255.0.0
  inet6 addr: fe80::20c:29ff:fe32:64a0/64 Scope:Link
  UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
  RX packets:0 errors:0 dropped:0 overruns:0 frame:0
  TX packets:9 errors:0 dropped:0 overruns:0 carrier:0
  collisions:0 txqueuelen:1000
  RX bytes:0 (0.0 b) TX bytes:546 (546.0 b)
  Interrupt:185 Base address:0x2000
  eth1 Link encap:Ethernet HWaddr 00:0C:29:32:64:AA
  inet addr:10.66.0.114 Bcast:10.66.1.255 Mask:255.255.254.0
  inet6 addr: fe80::20c:29ff:fe32:64aa/64 Scope:Link
  UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
  RX packets:126 errors:0 dropped:0 overruns:0 frame:0
  TX packets:55 errors:0 dropped:0 overruns:0 carrier:0
  collisions:0 txqueuelen:1000
  RX bytes:16122 (15.7 KiB) TX bytes:7651 (7.4 KiB)
  Interrupt:177 Base address:0x2080
  lo Link encap:Local Loopback
  inet addr:127.0.0.1 Mask:255.0.0.0
  inet6 addr: ::1/128 Scope:Host
  UP LOOPBACK RUNNING MTU:16436 Metric:1
  RX packets:8 errors:0 dropped:0 overruns:0 frame:0
  TX packets:8 errors:0 dropped:0 overruns:0 carrier:0
  collisions:0 txqueuelen:0
  RX bytes:560 (560.0 b) TX bytes:560 (560.0 b)
  [root@station2~]# route
  Kernel IP routing table
  Destination Gateway Genmask Flags Metric Ref Use Iface
  10.66.0.0 * 255.255.254.0 U 0 0 0 eth1
  169.254.0.0 * 255.255.0.0 U 0 0 0 eth1
  192.168.0.0 * 255.255.0.0 U 0 0 0 eth0
  default 10.66.1.254 0.0.0.0 UG 0 0 0 eth1
  [root@station2~]#
  配置server
  [root@station2~]# rpm -q netdump-server
  package netdump-server is not installed
  [root@station2~]# mount /dev/cdrom /media/
  mount: block device /dev/cdrom is write-protected, mounting read-only
  [root@station2~]# cd /media/RedHat/RPMS/
  [root@station2 RPMS]# rpm -ivh netdump-server-0.7.16-14.i386.rpm
  warning: netdump-server-0.7.16-14.i386.rpm: V3 DSA signature: NOKEY, key ID db42a60e
  Preparing... ########################################### [100%]
  1:netdump-server ########################################### [100%]
  [root@station2 RPMS]# service netdump-server
  Usage: netdump-server {start|stop|status|restart|condrestart}
  [root@station2 RPMS]# service netdump-server start
  Starting netdump server: [ OK ]
  [root@station2 RPMS]# chkconfig --level 35 netdump-server on
  [root@station2 RPMS]# passwd netdump
  Changing password for user netdump.
  New UNIX password:
  BAD PASSWORD: it is based on a dictionary word
  Retype new UNIX password:
  passwd: all authentication tokens updated successfully.
  [root@station2 RPMS]#
  安装netdump-server会在系统里面添加netdump这个用户。作为netdump时候传输数据之用。但是并不会给这个用户创建密码。所以这个用户默认是给锁住的。因此记得给netdump这个用户分配密码!
  配置client
  [root@station1~]# rpm -q netdump
  netdump-0.7.16-14
  [root@station1~]# vi /etc/sysconfig/netdump
  指定netdump-server的IP地址
  NETDUMPADDR=192.168.0.2
  [root@station1~]# service netdump start
   password:
  initializing netdump [ OK ]
  initializing netconsole [ OK ]
  Message from at Mon Sep 22 17:47:38 2008 ...
  station1 kernel: [...network console startup...]
  [root@station1~]#
  然后在client上模拟crash
  [root@station1~]# echo 1 >/proc/sys/kernel/sysrq
  [root@station1~]# echo c >/proc/sysrq-trigger
  然后netdump就给激活了。
  当dump完数据,机器自动重启。
  在server上检验数据
  [root@station2~]# cd /var/crash/
  [root@station2 crash]# ll
  total16
  drwx------ 2 netdump netdump 4096 Sep 22 18:05 192.168.0.1-2008-09-22-18:04
  drwx------ 2 netdump netdump 4096 Sep 22 17:48 magic
  drwxr-xr-x 2 netdump netdump 4096 Aug 4 2007 scripts
  [root@station2 crash]# cd 192.168.0.1-2008-09-22-18\:04/
  [root@station2192.168.0.1-2008-09-22-18:04]# ll
  total81656
  -rw------- 1 netdump netdump 30592 Sep 22 18:05 log
  -rw------- 1 netdump netdump 268439552 Sep 22 18:05 vmcore
  [root@station2192.168.0.1-2008-09-22-18:04]# ll -h
  total80M
  -rw------- 1 netdump netdump 30K Sep 22 18:05 log
  -rw------- 1 netdump netdump 257M Sep 22 18:05 vmcore
  [root@station2192.168.0.1-2008-09-22-18:04]#
  vmcore就是发生crash的时候内存里的数据。
  diskdump
  diskdump在Red Hat Enterprise Linux 4 update 4之前需要一块单独的硬盘。
  在Red Hat Enterprise Linux 4 update 4以及之后,只需要一块单独的磁盘分区。
  试验环境:
  Red Hat Enterprise Linux 4 update 7
  原本采用Red Hat Enterprise Linux 5 update 2,但是发现diskdump在RHEL5以及更高版本上被kdump所替代。
  试验步骤
  先划分一块分区给diskdump。
  [root@dhcp-0-084~]# fdisk /dev/sdb
  Device contains neither a valid DOS partition table, nor Sun, SGI or OSF disklabel
  Building a new DOS disklabel. Changes will remain in memory only,
  until you decide to write them. After that, of course, the previous
  content won't be recoverable.
  Warning: invalid flag 0x0000 of partition table 4 will be corrected by w(rite)
  Command(m for help): p
  Disk/dev/sdb: 4294 MB, 4294967296 bytes
  255 heads, 63 sectors/track, 522 cylinders
  Units= cylinders of 16065 * 512 = 8225280 bytes
  Device Boot Start End Blocks Id System
  Command(m for help): n
  Command action
  e extended
  p primary partition (1-4)
  p
  Partition number (1-4): 1
  First cylinder (1-522, default 1):
  Using default value 1
  Last cylinder or +size or +sizeM or +sizeK (1-522, default 522):
  Using default value 522
  Command(m for help): w
  The partition table has been altered!
  Calling ioctl() to re-read partition table.
  Syncing disks.
  partprobe[root@dhcp-0-084~]# partprobe
  Warning: Unable to open /dev/hdc read-write (Read-only file system). /dev/hdc has been opened read-only.
  No changes made to disk, exiting partprobe.
  [root@dhcp-0-084~]#
  我们用/dev/sdb1作为diskdump的数据临时存放点。
  编辑diskdump配置文件。
  [root@dhcp-0-084 RPMS]# rpm -qa | grep diskdump
  diskdumputils-1.4.1-5
  [root@dhcp-0-084 RPMS]# vi /etc/sysconfig/diskdump
  DEVICE=/dev/sdb1
  初始化并启动diskdump服务。
  [root@dhcp-0-084~]# service diskdump initialformat
  Formatting dump device:
  Do you want to format /dev/sdb1 (yes/NO)? yes
  /dev/sdb1:[100.0%]
  [root@dhcp-0-084~]# service diskdump restart
  Starting diskdump: [ OK ]
  [root@dhcp-0-084~]# chkconfig --level 35 diskdump on
  [root@dhcp-0-084~]#
  然后模拟死机。
  [root@dhcp-0-084 RPMS]# echo 1 >/proc/sys/kernel/sysrq
  [root@dhcp-0-084 RPMS]# echo c >/proc/sysrq-trigger
  此时diskdump开始工作。把内存中的数据dump到/dev/sdb1中。
  diskdump之后必须手动重启机器。
  重启机器后,diskdump会把/dev/sdb1里面的数据拷到/var/crash文件夹下。
  检查diskdump数据。
  [root@dhcp-0-084~]# cd /var/crash/
  [root@dhcp-0-084 crash]# ll
  total12
  drwx------ 2 root root 4096 Sep 22 19:52 127.0.0.1-2008-09-22-19:49
  drwxr-xr-x 2 netdump netdump 4096 Mar 25 23:33 scripts
  [root@dhcp-0-084 crash]# cd 127.0.0.1-2008-09-22-19\:49/
  [root@dhcp-0-084127.0.0.1-2008-09-22-19:49]# ll -h
  total86M
  -rw------- 1 root root 513M Sep 22 19:52 vmcore
  [root@dhcp-0-084127.0.0.1-2008-09-22-19:49]#
  kdump
  实验环境:
  client OS: Red Hat Enterprise Linux 5 update 2
  client IP: 10.66.0.157
  server OS: Red Hat Enterprise Linux 4 update 7
  server IP: 10.66.0.84
  kdump属于kexec-tools-1.102pre-21.el5这个包。一般装系统都有装上。
  kdump支持N种方式存储内存数据,包括裸设备,文件系统,nfs,ssh。并且能设定dump前和dump后运行脚本以及执行其他动作。正所谓很好很强大。
  kdump的dump机制是:预先生成一个crashkernel,在内核crash的时候,激活这个crashkernel,用这个crashkernel载入的小型系统dump处于crash状态的内核。
  实验步骤:
  这次我会配置kdump把内存数据scp到备份服务器上。
  在client上:
  安装kdump
  [root@dhcp-0-157~]# cat /etc/redhat-release
  Red Hat Enterprise Linux Server release 5.2 (Tikanga)
  [root@dhcp-0-157~]# cd /misc/cd/Server
  [root@dhcp-0-157 Server]# rpm -ivh busybox-1.2.0-4.el5.i386.rpm
  warning: busybox-1.2.0-4.el5.i386.rpm: Header V3 DSA signature: NOKEY, key ID37017186
  Preparing... ########################################### [100%]
  1:busybox ########################################### [100%]
  [root@dhcp-0-157 Server]# rpm -ivh kexec-tools-1.102pre-21.el5.i386.rpm
  warning: kexec-tools-1.102pre-21.el5.i386.rpm: Header V3 DSA signature: NOKEY, key ID 37017186
  Preparing... ########################################### [100%]
  1:kexec-tools ########################################### [100%]
  [root@dhcp-0-157 Server]#
  配置kdump
  [root@dhcp-0-157~]# vim /etc/kdump.conf
  
  对kernel开启kdump支持
  [root@dhcp-0-157~]# vim /boot/grub/grub.conf
  在kernel选项上添加crashkernel参数。
  参数格式是:
  crashkernel=nn[KMG]@ss[KMG]
  nn表示要为crashkernel预留多少内存
  ss表示为crashkernel预留内存的起始位置
  default=0
  timeout=5
  splashimage=(hd0,0)/grub/splash.xpm.gz
  hiddenmenu
  title Red Hat Enterprise Linux Server (2.6.18-92.el5)
  root(hd0,0)
  kernel/vmlinuz-2.6.18-92.el5 ro root=LABEL=/
  initrd/initrd-2.6.18-92.el5.img
  重启电脑使新参数生效。
  然后启用kdump服务
  [root@dhcp-0-157~]# service kdump
  Usage:/etc/init.d/kdump {start|stop|status|restart|propagate}
  [root@dhcp-0-157~]# service kdump propagate
  Generating new ssh keys... done.
  The authenticity of host '10.66.0.84 (10.66.0.84)' can't be established.
  RSA key fingerprint is 31:c2:d8:b6:eb:2e:03:64:cd:ba:56:e9:49:6e:5d:6c.
  Are you sure you want to continue connecting (yes/no)? yes
  Warning: Permanently added '10.66.0.84' (RSA) to the list of known hosts.
   password:
  /root/.ssh/kdump_id_rsa.pub has been added to ~root/.ssh/authorized_keys2 on 10.66.0.84
  [root@dhcp-0-157~]# service kdump restart
  Stopping kdump:[ OK ]
  No kdump initial ramdisk found.[WARNING]
  Rebuilding/boot/initrd-2.6.18-92.el5kdump.img
  Starting kdump:[ OK ]
  [root@dhcp-0-157~]# chkconfig --level 35 kdump on
  [root@dhcp-0-157~]#
  最后模拟crash:
  [root@dhcp-0-157 ~]# echo 1 >/proc/sys/kernel/sysrq
  [root@dhcp-0-157 ~]# echo c >/proc/sysrq-trigger
  在server的/var/crash下可以看到由client转储过来的内核数据。
  另外,kdump在完成内核转储后会自动重启。
 
阅读(1242) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~