Chinaunix首页 | 论坛 | 博客
  • 博客访问: 623790
  • 博文数量: 73
  • 博客积分: 1813
  • 博客等级: 上尉
  • 技术积分: 1213
  • 用 户 组: 普通用户
  • 注册时间: 2009-12-27 19:59
文章分类
文章存档

2013年(1)

2012年(12)

2011年(28)

2010年(31)

2009年(1)

我的朋友

分类: LINUX

2010-07-22 18:32:10

.nagios配置

 

1.在服务器端安装nrpe(此处使用2.12版本)

 

#tar zxvf nrpe-2.12.tar.gz

#cd nrpe-2.12

#./configure     (因为之前安装了nagios-plugins,所以nrpe默认安装在/usr/local/nagios/下,也就是也nagios-plugins在同一个安装目

录下)

#make all

#make install-plugin

#make install-daemon

#make install-daemon-config

 

# ls /usr/local/nagios/libexec/check_nrpe

/usr/local/nagios/libexec/check_nrpe    

此文件出现,表明安装成功

 

# ll /usr/local/nagios/

total 24

drwxrwxr-x  2 nagios nagios 4096 Jul 21 19:09 bin

drwxrwxr-x  3 nagios nagios 4096 Jul 22 13:35 etc

drwxrwxr-x  2 nagios nagios 4096 Jul 21 19:09 libexec

drwxrwxr-x  2 nagios nagios 4096 Jul 21 18:57 sbin

drwxrwxr-x 10 nagios nagios 4096 Jul 21 19:03 share

drwxrwxr-x  5 nagios nagios 4096 Jul 22 14:25 var

 

注意此时,在nagios目录下的所有文件与子目录所有者与所属组都为nagios,但是一个除外,/usr/local/nagios/etc/htpasswd.usrs root

root,以后再添加的文件也同样为nagios nagios,这里如果出现差错,后面可能会出权限问题。

 

2.配置nagios主配置文件nagios.cfg

#  cat nagios.cfg  只写出改动文件,下同

 

cfg_file=/usr/local/nagios/etc/objects/commands.cfg

cfg_file=/usr/local/nagios/etc/objects/contacts.cfg

cfg_file=/usr/local/nagios/etc/objects/timeperiods.cfg

cfg_file=/usr/local/nagios/etc/objects/templates.cfg

 

新添加下面4句,指向子文件所在位置

cfg_file=/usr/local/nagios/etc/hosts.cfg

cfg_file=/usr/local/nagios/etc/hostgroups.cfg

cfg_file=/usr/local/nagios/etc/contactgroups.cfg

cfg_file=/usr/local/nagios/etc/services.cfg

 

 

# Definitions for monitoring the local (Linux) host

#cfg_file=/usr/local/nagios/etc/objects/localhost.cfg  #注释掉,因为有了hosts.cfg文件

 

command_check_interval=10s

#command_check_interval=-1  #原来为-1,改成10s

 

 

3.由上一步新添加的4句,创建文件hosts.cfg hostgroup.cfg contactgroups.cfg services.cfg

 

4.配置hosts.cfg    hostgroup.cfg   contactgroups.cfg

 

# cat hosts.cfg

 

define host {

host_name               nagios-server    #hostgroup.cfg定义的保持一致

alias                   nagios server

address                 192.168.0.13     #被监控主机IP

contact_groups          sagroup          #监控用户所在的组名,在contactgroups.cfg定义

check_command           check-host-alive  #此为一个命令,在objects/commands.cfg中有定义,必须有定义

max_check_attempts      5                 #检测次数,一般为35

notification_interval   10               #检测时间间隔,单位为分钟,根据自己情况定

notification_period     24x7              #代表不间断的检测,不能为*,只能为x,下同

notification_options    d,u,r           #此为状态描述,d-down,u-unreacheable,r-recovery

}

 

----------------------------------------------------

# cat hostgroup.cfg 定义组与组成员

 

define hostgroup {

hostgroup_name  sa-servers

alias           sa servers

members         nagios-server     #(如果有多用户,可以以“,”分隔,不能有空格)

}

 

----------------------------------------------------

 

# cat contactgroups.cfg

 

define contactgroup {

contactgroup_name       sagroup

alias                   system administrator group

members                 nagiosadmin

}

 

--------------------

 

5.配置cgi.cfg

 

# cat cgi.cfg

use_authentication=0    #改成0表示不对用户进行cgi验证

 

authorized_for_system_information=nagiosadmin    #因为当时创建的管理用户就是nagiosadmin,所以此处不用修改,如果创建用户为其他

,则要修改,如果创建多个用户,可以用“,”分隔。

authorized_for_configuration_information=nagiosadmin

authorized_for_system_commands=nagiosadmin   #  * 此处即使是其他用户,也不能改动。*

authorized_for_all_services=nagiosadmin

authorized_for_all_hosts=nagiosadmin

authorized_for_all_service_commands=nagiosadmin

authorized_for_all_host_commands=nagiosadmin

 

 

6.配置nrpe.cfg

 

# cat nrpe.cfg | sed -n '/^[^#]/p'

 

log_facility=daemon

pid_file=/var/run/nrpe.pid

server_port=5666      #端口号,可以改动

nrpe_user=nagios

nrpe_group=nagios

allowed_hosts=127.0.0.1,192.168.0.13   #此处是可以连接管理此主机的服务器,也就是监控服务器的IP

 

dont_blame_nrpe=0

debug=0

command_timeout=60

connection_timeout=300

#下面是定义的命令

command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10    #连接用户数,超过5warning10Cirtical(严重)

command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20  #负载情况,三个数表示,当前,5分钟内,15

钟内

command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z  #使用内存

command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 150 -c 200  #总内存

command[check_swap]=/usr/local/nagios/libexec/check_swap -w 20% -c 10%  #交换分区使用率

command[check_disk]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/sda3  #磁盘分区使用率

 

 

 

还可以自己定义,通过写脚本来完成,后面再来补充。

 

7.配置objects/contacts.cfg

 

# cat objects/contacts.cfg

 

define contact{

contact_name                    nagiosadmin

alias                           system administrator

service_notification_period     24x7

host_notification_period        24x7

service_notification_options    w,u,c,r                  #代表Warning,Unknown,Critical,recovery

host_notification_options       d,u,r

service_notification_commands   notify-service-by-fetion,notify-service-by-sms   #指明报警方式

host_notification_commands      notify-host-by-fetion,notify-host-by-sms         #同上

email                          **********@139.com

pager                           152******13

}

 

 

8.配置 objects/commands.cfg

 

# cat objects/commands.cfg  (一定要定义的列出,其他的不必要变动)

 

# 'check-host-alive' define command

 

define command{

        command_name    check-host-alive

        command_line    $USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5

        }

# 'check_nrpe' define command  这个是要自己定义的,很重要,会影响到services.cfg中的配置

 

define command{

       command_name check_nrpe

       command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$    # $ARG1$表示check_nrpe后面的命令,如:check_disk

       }

 

 

# 'notify-host-by-fetion' command definition   飞信报警配置

 

define command{

        command_name    notify-host-by-fetion

        command_line    /usr/local/fetion/fetion --mobile=152******** --pwd=******** --to $CONTACTPAGER$ --msg-

utf8="$HOSTNAME$ is  $HOSTSTATE$" --debug

}

 

# 'notify-service-by-email' command definition

define command{

        command_name    notify-service-by-fetion

        command_line    /usr/local/fetion/fetion --mobile=152******** --pwd=******** --to $CONTACTPAGER$ --msg-

utf8="$NOTIFICATIONTYPE$: $HOSTALIAS$/$SERVICEDESC$ IS $SERVICESTATE$" --debug

        }

 

 

# 'notify-host-by-sms' command definition      邮件报警配置

 

define command {

       command_name notify-host-by-sms

       command_line  /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost:

$HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n" |

/usr/local/sendEmail/sendEmail -s "** $NOTIFICATIONTYPE$ Host Alert: $HOSTNAME$ is $HOSTSTATE$ **" $CONTACTEMAIL$

        }

 

# 'notify-service-by-sms' command definition

 

define command {

       command_name notify-service-bysms

       command_line  /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService:

$SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional

Info:\n\n$SERVICEOUTPUT$" | /usr/local/sendEmail/sendEmail -s "** $NOTIFICATIONTYPE$ Service Alert: $HOSTALIAS$/

$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$

       }

 

9.配置services.cfg

 

#cat services.cfg

 

###nagios-server:services.cfg###

 

define service {

host_name               nagios-server     #主机名一定要与hosts.cfg文件中的定义保持一致

service_description     check-host-alive

check_period            24x7

max_check_attempts      4

normal_check_interval   3

retry_check_interval    2

contact_groups          sagroup

notification_interval   10

notification_period     24x7

notification_options    w,u,c,r

check_command           check-host-alive  #命令为objects/commands.cfg中已经定义的

}

 

 

define service {

host_name               nagios-server

service_description     check_tcp 80

check_period            24x7

max_check_attempts      4

normal_check_interval   3

retry_check_interval    2

contact_groups          sagroup

notification_interval   10

notification_period     24x7

notification_options    w,u,c,r

check_command           check_tcp!80   #感叹号后面为参数

}

 

 

 

define service {

host_name               nagios-server

service_description     check_local_disk

check_period            24x7

max_check_attempts      4

normal_check_interval   3

retry_check_interval    2

contact_groups          sagroup

notification_interval   10

notification_period     24x7

notification_options    w,u,c,r

#check_command          check_local_disk!20%!10%!/

check_command           check_nrpe!check_disk

}

 

 

 

define service {

host_name               nagios-server

service_description     check_load

check_period            24x7

max_check_attempts      4

normal_check_interval   3

retry_check_interval    2

contact_groups          sagroup

notification_interval   10

notification_period     24x7

notification_options    w,u,c,r

check_command           check_nrpe!check_load

}

 

define service {

host_name               nagios-server

service_description     check_total_procs

check_period            24x7

max_check_attempts      4

normal_check_interval   3

retry_check_interval    2

contact_groups          sagroup

notification_interval   10

notification_period     24x7

notification_options    w,u,c,r

check_command           check_nrpe!check_total_procs

}

 

define service {

host_name               nagios-server

service_description     check_users

check_period            24x7

max_check_attempts      4

normal_check_interval   3

retry_check_interval    2

contact_groups          sagroup

notification_interval   10

notification_period     24x7

notification_options    w,u,c,r

check_command           check_nrpe!check_users

}

 

 

此处定义监控6个服务,如果要监控其他主机的服务,也要在这里定义,下面会提到。

 

 

 

10.此时配置完成了一大步,以后再配置也是在这个基础上,会很容易了。

下面就要启动nrpe,重启nagios来检测配置是否成功!

 

#/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg

Checking for circular paths between hosts...

Checking for circular host and service dependencies...

Checking global event handlers...

Checking obsessive compulsive processor commands...

Checking misc settings...

 

Total Warnings: 0

Total Errors:   0

 

出现此处,表明,配置文件没有错误,可以启动nagios

 

#service nagios restart  启动成功

 

 

# /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d

# tail -f /var/log/messages

Jul 22 16:25:16 localhost nrpe[14911]: Starting up daemon

Jul 22 16:25:16 localhost nrpe[14911]: Listening for connections on port 5666

Jul 22 16:25:16 localhost nrpe[14911]: Allowing connections from: 127.0.0.1,192.168.0.13

日志信息出现如上,表明启动成功,测试一下

 

 

# /usr/local/nagios/libexec/check_nrpe -H 192.168.0.13

NRPE v2.12                     会显示nrpe版本号

 

# /usr/local/nagios/libexec/check_nrpe -H 192.168.0.13 -c check_disk

DISK OK - free space: / 242377 MB (87% inode=99%);| /=34099MB;233219;262371;0;291524

 

能出现这些信息表明成功!
 
 
还没有结束,请转 Nagios被监控端安装配置 连载3
 
nagios-手机飞信与sendEmail配置
阅读(2793) | 评论(0) | 转发(1) |
给主人留下些什么吧!~~