[root@server1 nagios]# 1 安装yum install gd-devel -y
[root@server1 nagios]# 2 部署lamp环境yum install httpd mysql mysql-server php php-mysql -y
[root@server1 nagios]# 3 安装主程序nagios
[root@server1 nagios]# tar fvxz nagios.tar.gz
[root@server1 nagios]# ./configure --prefix=/usr/local/nagios
[root@server1 nagios]# useradd nagios
[root@server1 nagios]# make all
[root@server1 nagios]# make install
[root@server1 nagios]# make install && make install-init && make install-commandmode && make install-config && make install-webconf
make install
- This installs the main program, CGIs, and HTML files
make install-init
- This installs the init script in /etc/rc.d/init.d
make install-commandmode
- This installs and configures permissions on the
directory for holding the external command file
make install-config
- This installs *SAMPLE* config files in /usr/local/nagios/etc
You'll have to modify these sample files before you can
use Nagios. Read the HTML documentation for more info
on doing this. Pay particular attention to the docs on
object configuration files, as they determine what/how
things get monitored!
make install-webconf
- This installs the Apache config file for the Nagios
web interface
重新启动apache
service httpd restart
生成用户
[root@server1 nagios-3.2.0]# htpasswd -c /usr/local/nagios/etc/htpasswd.users nagios
New password:
Re-type new password:
Adding password for user nagios
给nagios用户开权限,让他能够查看信息!
[root@server1 nagios-3.2.0]# vim /usr/local/nagios/etc/cgi.cfg
在所有的nagiosadmin后面添加nagios
本机为什么是down的状态???
监控分析控制台 ---------------主程序
插件
--------------被监控主机
[root@server1 libexec]# pwd
/usr/local/nagios/libexec
[root@server1 libexec]# ls
[root@server1 libexec]#
插件目录下什么没有有阿!
安装插件
[root@server1 nagios-plugins-1.4.13]# ./configure --prefix=/usr/local/nagios/
--with-gnutls --with-openssl --enable-extra-opts --enable-perl-modules
make && make install
怎样监控的更多!
[root@server1 etc]# pwd
/usr/local/nagios/etc
[root@server1 etc]# vim nagios.cfg
编辑主配置文件
cfg_file=/usr/local/nagios/etc/objects/commands.cfg
cfg_file=/usr/local/nagios/etc/objects/contacts.cfg
cfg_file=/usr/local/nagios/etc/objects/timeperiods.cfg
cfg_file=/usr/local/nagios/etc/objects/templates.cfg
通过上面的语句来调用那些配置文件
[root@server1 objects]# pwd
/usr/local/nagios/etc/objects
时间timeperiods.cfg
define timeperiod{
timeperiod_name 24x7
alias 24 Hours A Day, 7 Days A Week
sunday 00:00-24:00
monday 00:00-24:00
tuesday 00:00-24:00
wednesday 00:00-24:00
thursday 00:00-24:00
friday 00:00-24:00
saturday 00:00-24:00
}
插件commands.cfg
define command{
command_name check-host-alive
command_line $USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5
}
监控谁localhost.cfg
define host {
host_name xxxx
alias test
address 192.168.18.50
check_command check-host-alive
notification_options d,u,r
check_interval 1
max_check_attempts 2
contact_groups admins
notification_interval 10
notification_period 24x7
}
联系人contacts.cfg
define contact {
contact_name kyo
alias kyo
host_notification_period 24x7
host_notification_options d,u,r
service_notification_period 24x7
service_notification_options w,u,c,r
service_notification_commands notify-service-by-email
host_notification_commands notify-host-by-email
email root@163.com
#通过飞信机器人发信报警!
}
define contactgroup{
contactgroup_name admins
alias Nagios Administrators
members nagiosadmin,kyo
}
检查错误
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
定义服务
define service {
host_name xxxx
service_description apache
check_period 24x7
normal_check_interval 2
retry_check_interval 1
max_check_attempts 5
notification_period 24x7
notification_options w,u,c,r
check_command check_http
}
关于插件的返回状态
[root@server1 objects]# echo $?
2
[root@server1 objects]# /usr/local/nagios/libexec/check_http -I 192.168.18.50
HTTP OK HTTP/1.1 200 OK - 43306 bytes in 0.026 seconds |time=0.026288s;;;0.000000 size=43306B;;;0
[root@server1 objects]# echo $?
0
[root@server1 objects]# /usr/local/nagios/libexec/check_http -I 192.168.18.50 -u /a.html -s hello
HTTP WARNING: HTTP/1.1 404 Not Found
[root@server1 objects]# echo $?
1
0 成功 1 警告 2 严重错误 3 未知
自己编写插件!!!!!!!!!!!!!!!!
自定义命令
define command {
command_name check_url
command_line $USER1$/check_http -I $HOSTADDRESS$ -u $ARG1$ -s $ARG2$
}
使用新定义的命令
define service {
host_name xxxx
service_description apache
check_period 24x7
normal_check_interval 2
retry_check_interval 1
max_check_attempts 5
notification_period 24x7
notification_options w,u,c,r
# check_command check_http
check_command check_url!/index.html!hello
}
如果遇到host条目一会有,一会消失的问题,可以killall nagios 再重新启动nagios!
监控远程主机的系统信息
被监控主机
安装nrpe的server端
tar fvxz nrpe*.tar.gz
./configure --prefix=/usr/local/nagios
useradd nagios
make
make install-daemon
make install-daemon-config
make install-xinetd
安装插件
make install (这步不是必须的!)
把插件拷贝给监控主机nagios
scp /usr/local/nagios/libexec/check_nrpe root@监控主机的ip:/usr/local/nagios/libexec/
在被监控主机开启nrpe服务
vim /etc/xinetd.d/nrpe
# default: on
# description: NRPE (Nagios Remote Plugin Executor)
service nrpe
{
flags = REUSE
socket_type = stream
port = 5666
wait = no
user = nagios
group = nagios
server = /usr/local/nagios/bin/nrpe
server_args = -c /usr/local/nagios/etc/nrpe.cfg --inetd
log_on_failure += USERID
disable = no
only_from = 192.168.18.254 #监控主机的ip,保证他可以连接进来!
}
vim /etc/services
nrpe 5666/tcp
service xinetd restart
[root@server1 objects]# /usr/local/nagios/libexec/check_nrpe -H 192.168.18.188
NRPE v2.12
#注意关闭防火墙!
在被监控主机安装插件
vim nrpe.cfg
command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10
command[check_u]=/usr/local/nagios/libexec/check_users -w 5 -c 10
command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
command[check_hda1]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/hda1
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 150 -c 200
通过以上字段来定义命令,以及接收命令后执行的插件
如果想不明白
nagios -----libexec/check_nrpe -c 命令 发送给被监控主机
被监控主机接收到命令以后去查找nrpe.cfg中command字段,再去执行对应的本地插件,返回结果给监控主机的nagios
定义服务,来检测一下
define host {
host_name zcg
alias nrpe-server
address 192.168.18.188
check_command check-host-alive
notification_options d,u,r
check_interval 1
max_check_attempts 2
contact_groups admins
notification_interval 10
notification_period 24x7
}
define service {
host_name zcg
service_description nrpe
check_period 24x7
normal_check_interval 2
retry_check_interval 1
max_check_attempts 5
notification_period 24x7
notification_options w,u,c,r
check_command check_nrpe!check_users
#这里定义的check_nrpe需要在command.cfg里面定义
}
别忘了,先定义好zcg这台主机!!
定义命令
define command {
command_name check_nrpe
command_line /usr/local/nagios/libexec/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
重启nagios服务!
1 利用飞信的机器人发信
2 实现自动添加nagios监控主机
3 自行编写nagios插件