Chinaunix首页 | 论坛 | 博客
  • 博客访问: 145241
  • 博文数量: 68
  • 博客积分: 10
  • 博客等级: 民兵
  • 技术积分: 720
  • 用 户 组: 普通用户
  • 注册时间: 2011-02-28 20:01
文章分类

全部博文(68)

文章存档

2015年(68)

我的朋友

分类: 系统运维

2015-08-31 17:19:51

nagios的事件处理(Event Handlers)可以在监控的主机或服务的状态发生变化时触发脚本或系统命令来对故障进行处理,事件处理在告警发出前发生。我下面记录的是通过Nagios检测远程机器上的tomcat服务,当远程机器上的tomcat服务挂死或当掉时通过Nagios的事件处理来重启远程机器上的tomcat。

 

被监控端配置:(安装的客户端为nrpe)

(1)创建事件处理脚本

[root@ ~]# mkdir -p  /usr/lib64/nagios/plugins/eventhandlers 

[root@ ~]# cd /usr/lib64/nagios/plugins/eventhandlers 

[root@ ~]# vi /usr/lib64/nagios/plugins/eventhandlers/restart_tomcat.sh

restart_tomcat.sh内容如下:

#!/bin/sh

# restart tomcat service

RUN_AS_USER=appuser

CATALINA_HOME=/usr/local/-omcat

su - $RUN_AS_USER -c "$CATALINA_HOME/bin/shutdown.sh"

sleep 3

su - $RUN_AS_USER -c "$CATALINA_HOME/bin/startup.sh" 

[root@ ~]#/usr/lib64/nagios/plugins/eventhandlers/myhandler.sh

myhandler.sh内容如下:

#!/bin/sh

# eventhander to restart tomcat

date=`date`

case "$1" in

OK)

       ;;

WARNING)

        ;;

UNKNOWN)

        ;;

CRITICAL)

        case "$2" in

        SOFT)

                case "$3" in

                3)                       

                       echo -n "Restarting Tomcat service (3rd soft critical state)...\n"

                        /usr/bin/sudo /usr/local/nagios/libexec/eventhanders/restart_tomcat.sh

                        echo "$date - restart BLAH - SOFT"  >> /tmp/eventhandlers

                        ;;

                        esac

                ;;

        HARD)

                case "$3" in

                4)

                        echo -n "Restarting Tomcat service...\n"

                        # Call the init script to restart the NRPE server

                        echo "$date - restart BLAH - HARD"  >> /tmp/eventhandlers

                        /usr/bin/sudo /usr/local/nagios/libexec/eventhanders/restart_tomcat.sh

                        ;;

                        esac

                ;;

        esac

        ;;

esac

exit 0

[root@ ~]# chmod +x  /usr/lib64/nagios/plugins/eventhandlers/*
(2)更改nrpe配置[root@ ~]# vi /usr/local/nagios/etc/nrpe.cfg添加如下内容:

command[restart_tomcat]=/usr/local/nagios/libexec/eventhandlers/myhandler.sh  '$ARG1$' '$ARG2$' '$ARG3$' '$ARG4$' #定义监控命令

dont_blame_nrpe=1  #允许命令中带有参数

(3)给nagios执行脚本的权限

[root@ ~]# visudo

nagios ALL=(root) NOPASSWD:/usr/lib64/nagios/plugins/eventhandlers/restart_tomcat.sh


服务端配置:

(1)打开全局事件处理

[root@ ~]# vi /usr/local/nagios/etc/nagios.cfg

"enable_event_handlers=1"

(2)定义命令

[root@ ~]# vi /usr/local/nagios/etc/objects/commands.cfg

define command{

        command_name    restart_tomcat

        command_line       /usr/local/nagios/libexec/check_nrpe -H $HOSTADDRESS$ \

        -c restart_tomcat -a $SERVICESTATE$ $SERVICESTATETYPE$ $SERVICEATTEMPT$

        }

(3)配置服务使用事件处理

define service {

        use                                    generic-service

        host_name                       host301

        service_description         S_Tomcat[8083]

        contact_groups                 sys_admins

        check_interval                    5

        retry_interval                       1

        max_check_attempts       5

       event_handler                    restart_tomcat

        check_command check_tcp!8083

}


测试:

可在服务端输入如下命令测试

[root@ ~]# /usr/local/nagios/libexec/check_nrpe -H $HOSTADDRESS$ 

-c restart_tomcat -a CRITICAL HARD 4

然后在被监控端关掉tomcat看看能不能自动重启

阅读(196) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~