博客是我工作的好帮手,遇到困难就来博客找资料
分类: 系统运维
2017-02-20 16:04:59
Nagios是一款企业级开源软件,专注于监控服务器上服务是否正常,不生成图形,提供报警机制,邮件或者短信发送监控状态,它通过各种插件实现不同的功能。
Nagios 监控平台主程序
Nagios-plugins 必选插件
NRPE 监控远程服务器的主机资源
NSClient++ 用于监控Windows主机
NDOUtils 将数据写入数据库
一、安装RHEL7.2
最小化安装,配置IP,时间同步,本地yum源,安装vim(个人习惯)、bash-completion(命令补齐)
# hostnamectl set-hostname nagios_cacti
# yum install vim
# yum install bash-completion
# yum install chrony
# systemctl enable chronyd
# systemctl start chronyd
# vim /etc/chrony.conf
server 10.100.2.5 iburst //增加一行时间源
# yum install ntpdate
# ntpdate 10.100.2.5 //手动同步时间
配置CentOS 163 yum源
# yum install wget
# wget
# wget
# wget
# wget
# rpm -qa|grep yum //检查redhat是否安装了yum,及有哪些Yum包
# rpm -qa|grep yum|xargs rpm -e --nodeps //删除redhat自带的yum包
# rpm -ivh yum-3.4.3-132.el7.centos.0.1.noarch.rpm yum-metadata-parser-1.1.4-10.el7.x86_64.rpm yum-plugin-fastestmirror-1.1.31-34.el7.noarch.rpm
# mv CentOS7-Base-163.repo /etc/yum.repos.d/
# vim /etc/yum.repos.d/CentOS7-Base-163.repo //通过":1,$s/$releasever/7/gc"和":1,$s/$basearch/x86_64/gc"查找和替换文件内容
# yum clean all //清除yum缓存
# yum makecache //重建缓存,以提高搜索软件包速度
# yum update //更新系统(省略)
实例应用:
1 监控快速部署
监控需要安装http php nagios nagios-plugins NRPE软件包
yum install -y gd gd-devel openssl openssl-devel httpd php gcc glibc glib-common make wget
net-snmp
setenforce 0
iptables -F
安装nagios 源码包下载安装
wget
groupadd nagios
useradd -g nagios nagios
tar -zxf nagios-3.5.0.tar.gz -C /usr/src/
cd /usr/src/nagios
./configure --with-nagios-user=nagios --with-nagios-group=nagios
make all
make install
make install-init #安装启动脚本
make install-commandmode #安装与配置目录权限
make install-config #安装配置文件模板
make install-webconf #web监控界面配置
安装nagios-plugins和nrpe
wget
tar -zxf nagios-plugins-1.4.16.tar.gz -C /usr/src/
cd /usr/src/nagios-plugins-1.4.16
./configure --prefix=/usr/local/nagios/
make && make install
wget wget
tar -zxf nrpe-2.14.tar.gz -C /usr/src/
cd /usr/src/nrpe-2.14
./configure
make all
make install-plugin
make install-daemon
make install-daemon-config
chown -R nagions.nagions /usr/local/nagios
创建账户信息
htpasswd -c /usr/local/nagions/etc/htpasswd.users tomcat
iptables -I INPUT -p tcp --dport 80 -j ACCEPT
service iptables save
启动服务
service httpd start
/etc/init.d/nagios start
chkconfig httpd on
chkconfig --add nagios
chkconfig nagios on
2 修改配置文件
nagios的配置文件较多,主要位于/usr/local/nagios/etc 下
nagios.conf 主配置文件
nrpe.cfg 远程监控配置文件
cgi.conf CGI配置文件
commands.cfg 命令定义文件
contacts.cfg 定义联系人文件
timepreriods.cfg 时间周期定义文件
tempaltes.cfg 对象定义参考模板
localhost.cfg 监控本机配置模板
printer.cfg 监控打印机模板
switch.cfg 监控交换模板
windows.cfg 监控Windows配置模板
很多配置文件无需修改可以直接使用
修改主配置文件nagios.cfg,主要是用cfg_file配置加载其他配置文件。
vim /usr/local/nagios/etc/nagios.cfg
cfg_file=/usr/local/nagios/etc/objects/commands.cfg
cfg_file=/usr/local/nagios/etc/objects/contacts.cfg
cfg_file=/usr/local/nagios/etc/objects/templates.cfg
cfg_file=/usr/local/nagios/etc/objects/timeperiods.cfg
cfg_file=/usr/local/nagios/etc/objects/localhost.cfg
cfg_file=/usr/local/nagios/etc/web1.cfg
cfg_file=/usr/local/nagios/etc/web2.cfg
修改CGI配置文件cgi.cfg,添加tomcat账户进来
vim /usr/local/nagios/etc/cgi.cfg
default_user_name=tomcat
authorized_for_system_information=nagiosadmin,tomcat
authorized_for_configuration_information=nagiosadmin,tomcat
authorized_for_system_commands=nagiosadmin,tomcat
authorized_for_all_services=nagiosadmin,tomcat
authorized_for_all_hosts=nagiosadmin,tomcat
authorized_for_all_service_commands=nagiosadmin,tomcat
authorized_for_all_host_commands=nagiosadmin,tomcat
修改命令配置文件command.cfg,定义命令实现的方式,如邮件报警,使用工具,内容格式等。
vim /usr/local/nagios/etc/objects/commands.cfg
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -t 30 -c $ARG1$
}
define command{
command_name check_nrpe_args
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -t 30 -c $ARG1$ -a $ARG2$
}
修改联系人配置文件contacts.cfg 报警的联系人及联系方式
define contact{
contact_name nagiosadmin
use generic-contact
alias Nagios Admin
email yourname@domain.com
}
修改报警时间周期timeperiods.cfg
vim /usr/local/nagios/etc/objects/timeperiods.cfg
define timeperiods{
timeperiod_name 24x7 #监控所有时间段(7*24小时)
alias 24 Hours A Day, 7 Days A Week
sunday 00:00-24:00
monday 00:00-24:00
tuesday 00:00-24:00
wednesday 00:00-24:00
thursday 00:00-24:00
friday 00:00-24:00
saturday 00:00-24:00
}
修改本机的配置localhost.cfg
define host{
use linux-server
host_name duangr-1
alias duangr-1
address 192.168.56.10
}
define service{
use local-service
host_name duangr-1
service_description Host Alive
check_command check-host-alive
}
define service{
use local-service
host_name duangr-1
service_description Users
check_command check_local_users!20!50
}
define service{
use local-service
host_name duangr-1
service_description CPU
check_command check_local_load!5.0,4.0,3.0!10.0,6.0,4.0
}
define service{
use local-service
host_name duangr-1
service_description Disk Root
check_command check_local_disk!20%!10%!/
}
define service{
use local-service
host_name duangr-1
service_description Disk Home
check_command check_local_disk!20%!10%!/export/home
}
define service{
use local-service
host_name duangr-1
service_description Zombie Procs
check_command check_local_procs!5!10!Z
}
define service{
use local-service
host_name duangr-1
service_description Total Procs
check_command check_local_procs!250!400!RSZDT
}
define service{
use local-service
host_name duangr-1
service_description Swap Usage
check_command check_local_swap!20!10
}
修改模板文件templates.cfg
vi /usr/local/nagios/etc/objects/templates.cfg
#联系人模板generic-contact
define contact{
name generic-contact
service_notification_period 24x7
host_notification_period 24x7
service_notification_options w,u,c,r,f,s
host_notification_options d,u,r,f,s
service_notification_commands notify-service-by-email
host_notification_commands notify-host-by-email
register 0
}
#定义generic-host主机模板
define host{
name generic-host
notifications_enabled 1
event_handler_enabled 1
flap_detection_enabled 1
failure_prediction_enabled 1
process_perf_data 1
retain_status_information 1
retain_nonstatus_information 1
notification_period 24x7
register 0
}
#定义Linux主机模板
define host{
name linux-server
use generic-host
check_period 24x7
check_interval 5
retry_interval 1
max_check_attempts 10
check_command check-host-alive
notification_period workhours
notification_interval 120
notification_options d,u,r
contact_groups admins
register 0
}
创建远程监控web1.cfg
vim /usr/local/nagios/etc/web1.cfg
define host{
use linux-server
host_name duangr-2
alias duangr-2
address 192.168.56.11
}
define service{
use local-service
host_name duangr-2
service_description Host Alive
check_command check-host-alive
}
define service{
use local-service
host_name duangr-2
service_description Users
check_command check_nrpe_args!check_users!5 10
}
define service{
use local-service
host_name duangr-2
service_description CPU
check_command check_nrpe_args!check_load!15,10,5 30,25,20
}
define service{
use local-service
host_name duangr-2
service_description Disk Root
check_command check_nrpe_args!check_disk!20% 10% /
}
define service{
use local-service
host_name duangr-2
service_description Disk /export/home
check_command check_nrpe_args!check_disk!20% 10% /export/home
}
define service{
use local-service
host_name duangr-2
service_description Procs Zombie
check_command check_nrpe_args!check_procs!5 10 Z
}
define service{
use local-service
host_name duangr-2
service_description Procs Total
check_command check_nrpe_args!check_procs_args!"-w400 -c600" }
define service{
use local-service
host_name duangr-2
service_description Swap Usage
check_command check_nrpe_args!check_swap!20% 10%
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; 下面是一些常用进程的监控,主要是云平台相关进程
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; 监控crond进程
define service{
use local-service
host_name duangr-2
service_description PS: crond
check_command check_nrpe_args!check_procs_args!"-c1:1 -Ccrond" }
;; 监控zookeeper进程
define service{
use local-service
host_name duangr-2
service_description PS: QuorumPeerMain
check_command check_nrpe_args!check_procs_args!"-c1:1 -Cjava -aserver.quorum.QuorumPeerMain" }
;;监控storm的从节点进程
define service{
use local-service
host_name duangr-2
service_description PS: supervisor
check_command check_nrpe_args!check_procs_args!"-c1:1 -Cjava -adaemon.supervisor" }
;; 监控storm的主节点进程
define service{
use local-service
host_name duangr-2
service_description PS: nimbus
check_command check_nrpe_args!check_procs_args!"-c1:1 -Cjava -adaemon.nimbus" }
;; 监控MetaQ进程
define service{
use local-service
host_name duangr-2
service_description PS: MetaQ
check_command check_nrpe_args!check_procs_args!"-c1:1 -Cjava -ametamorphosis-server-w" }
;; 监控Redis进程
define service{
use local-service
host_name duangr-2
service_description PS: redis-server
check_command check_nrpe_args!check_procs_args!"-c1:1 -Credis-server" }
;; 监控hadoop主节点NameNode进程
define service{
use local-service
host_name duangr-2
service_description PS: NameNode
check_command check_nrpe_args!check_procs_args!"-c1:1 -Cjava -aserver.namenode.NameNode" }
;; 监控hadoop主节点SecondaryNameNode进程
define service{
use local-service
host_name duangr-2
service_description PS: SecondaryNameNode
check_command check_nrpe_args!check_procs_args!"-c1:1 -Cjava -aserver.namenode.SecondaryNameNode" }
;; 监控hadoop主节点ResourceManager进程
define service{
use local-service
host_name duangr-2
service_description PS: ResourceManager
check_command check_nrpe_args!check_procs_args!"-c1:1 -Cjava -aserver.resourcemanager.ResourceManager" }
;; 监控hadoop从节点DataNode进程
define service{
use local-service
host_name duangr-2
service_description PS: DataNode
check_command check_nrpe_args!check_procs_args!"-c1:1 -Cjava -aserver.datanode.DataNode" }
;;监控hadoop从节点NodeManager进程
define service{
use local-service
host_name duangr-2
service_description PS: NodeManager
check_command check_nrpe_args!check_procs_args!"-c1:1 -Cjava -aserver.nodemanager.NodeManager" }
由于duangr-2是远程主机,因此使用check_nrpe_args命令来监控.
/etc/init.d/nagios restart
快速定位配置文件问题所在命令
/usr/local/nagios/bin/nagios -V /usr/local/nagios/etc/nagios.cfg
3 被监控机安装软件 nagios-plugin nrpe
yum install -y openssl openssl-devel
groupadd nagios
useradd -g nagios -s /sbin/nologin nagios
tar -zxf nagios-plugins-2.1.6.tar.gz -C /usr/src/
cd /usr/src/nagios-plugins-2.1.6
./configure --prefix=/usr/local/nagios/ --with-nagios-user=nagios --with-nagios-group=nagios
make && make install
tar -zxf nrpe-2.14.tar.gz -C /usr/src/
cd /usr/src/nrpe-2.14
./configure
make all
make install-plugin
make install-daemon
make install-daemon-config
修改客户端的NRPE配置文件
command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10
command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
command[check_sda2]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/sda2
command[check_swap]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/shm
command[check_home]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/mapper/VolGroup00-LogVol00
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 200 -c 300
command[check_ping81]=/usr/local/nagios/libexec/check_ping -H 10.155.0.1 -w 100.0,20% -c 500.0,60%#
command[check_hda1]=/usr/local/nagios/libexec/check_disk -w 20 -c 10 -p /dev/hda1
/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
echo "/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d" >> /etc/rc.local
netstat -lnupt |grep 5666
iptables -I INPUT -p tcp --dport 5666 -j ACCEPT
service iptables save
检查监控命令配置是否ok
/usr/local/nagios/libexec/check_nrpe -H localhost -c check_users -a 5 10
/usr/local/nagios/libexec/check_nrpe -H localhost -c check_load -a 15,10,5 30,25,20
/usr/local/nagios/libexec/check_nrpe -H localhost -c check_disk -a 20% 10% /
/usr/local/nagios/libexec/check_nrpe -H localhost -c check_procs -a 200 400 RSZDT
/usr/local/nagios/libexec/check_nrpe -H localhost -c check_swap -a 20% 10%
没有问题就可以用浏览器访问nagios了
二、安装Nagios
1、下载软件包并安装Nagios
Nagios-4.2.1:
Nagios-plugins-2.1.3:
NRPE-3.0.1:
官方安装文档:Nagios QuickstartInstallation Guides
# yum install httpd php gcc glibc glibc-common gd gd-devel
# yum install unzip //编译所需,否则会报错。
# useradd -M -s /sbin/nologin nagios
# usermod -aG nagios apache
# tar zxvf nagios-4.2.1.tar.gz
# cd nagios-4.2.1/
# make all
# make install
# make install-init
# make install-config
# make install-commandmode
# make install-webconf
# vim /usr/local/nagios/etc/objects/contacts.cfg
email xxx@xxx.com //修改nagios警告信息的邮件地址
# htpasswd -c /usr/local/nagios/etc/htpasswd.users nagios //配置登录账号和密码
2、安装nagios-plugins插件
# tar zxvfnagios-plugins-2.1.3.tar.gz
# cd nagios-plugins-2.1.3/
# ./configure --with-nagios-user=nagios --with-nagios-group=nagios
# make
# make install
# chown -R nagios.nagios/usr/local/nagios/
# systemctl enable httpd
# systemctl start httpd
# systemctl enable nagios
# systemctl start nagios
# /etc/init.d/nagios checkconfig //检查nagios配置文件是否有错误,或使用以下命令检查:
# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
# firewall-cmd --zone=public --add-service=http –permanent
# firewall-cmd –reload
# systemctl restart firewalld
默认HTTP会有告警信息,解决办法:在/var/www/html目录新建一个空白index.html文件即可。
# touch /var/www/html/index.html
重启nagios和httpd服务,等待几分钟即恢复正常。
3、安装NRPE插件
# tar zxvf nrpe-3.0.1.tar.gz
# cd nrpe-3.0.1/
# yum install openssl-devel //解决checking for SSL headers... configure: error: Cannotfind ssl headers错误问题
# ./configure --with-nrpe-user=nagios --with-nrpe-group=nagios --with-nagios-user=nagios --with-nagios-group=nagios --enable-command-args --enable-ssl
# make all //编译和安装nrpe
# make install-plugin
# make install-daemon
# make install-config //注:nrpe3.0以下请使用# make install-daemon-config
# /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg –d //启动nrpe服务
# yum install net-tools
# netstat –tnpl //可以看到5666端口已处于监听状态,说明nrpe服务已启动
# echo “/usr/local/nagios/bin/nrpe-c /usr/local/nagios/etc/nrpe.cfg –d” >> /etc/rc.local
# chmod +x /etc/rc.d/rc.local //设置开机自启动,手动重启方法如下:
# pkill nrpe && /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg –d
# vim /usr/local/nagios/etc/objects/commands.cfg //末尾增加以下内容
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
//允许check_nrpe命令定义nagios服务,-c后面带的$ARG1$参数是传给nrpe daemon执行的检测命令,它必须是nrpe.cfg中所定义的命令。
//自定义的Servers下的cfg配置文件中使用check_nrpe的时候要用”!”带上这个参数。
//可通过# /usr/local/nagios/libexec/check_nrpe –h查看插件的命令参数。
# mkdir /usr/local/nagios/etc/servers //创建servers监控配置文件集中存储目录
# vim /usr/local/nagios/etc/nagios.cfg //修改配置文件
cfg_dir=/usr/local/nagios/etc/servers //启用此规则,即默认读取处理此目录下的配置文件
4、添加客户端(Client被监控端)
1>、客户端安装NRPE和插件nagios-plugins
下载所需软件包
nagios-plugins-2.1.3.tar.gz
nrpe-3.0.1.tar.gz
新建用户
# useradd –M –s /sbin/nologinnagios
先安装nagios-plugins(NRPE依赖于nagios-plugins)
# tar zxvf nagios-plugins-2.1.3.tar.gz
# cd nagios-plugins-2.1.3
# ./configure--with-nagios-user=nagios --with-nagios-group=nagios
# make all
# make install
再安装NRPE
# yum install openssl-devel
# tar zxvf nrpe-3.0.1.tar.gz
# cd nrpe-3.0.1
# ./configure --with-nrpe-user=nagios --with-nrpe-group=nagios --with-nagios-user=nagios --with-nagios-group=nagios --enable-command-args --enable-ssl
# make all
# make install-plugin
# make install-daemon
# make install-config
# ls /usr/local/nagios/libexec/ //查看安装成功的NRPE插件,有check_nrpe说明安装成功
# vim /usr/local/nagios/etc/nrpe.cfg //配置nrpe
allowed_hosts=127.0.0.1,10.100.2.158 //添加服务端IP
dont_blame_nrpe=1 //把0改为1,允许命令参数
#/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg –d //启动nrpe服务
为了便于NRPE服务的启动,可以定义一个/etc/init.d/nrpe脚本
# vim /etc/init.d/nrpe //输入以下内容:
#!/bin/bash
# chkconfig: 2345 88 12
# description: NRPE DAEMON
NRPE=/usr/local/nagios/bin/nrpe
NRPECONF=/usr/local/nagios/etc/nrpe.cfg
case "$1" in
start)
echo -n "Starting NRPE daemon..."
$NRPE -c $NRPECONF -d
echo " done."
;;
stop)
echo -n "Stopping NRPE daemon..."
pkill -u nagios nrpe
echo " done."
;;
restart)
$0 stop
sleep 2
$0 start
;;
*)
echo "Usage: $0start|stop|restart"
;;
esac
exit 0
# chmod a+x /etc/init.d/nrpe //赋予脚本执行权限,即可以通过systemctl或service执行启动,停止了。
# service nrpe start //启动nrpe
#echo “/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d” >> /etc/rc.local
或# chkconfig nrpe on //设置为开机自启动
# netstat –tnlp //查看5666端口是否成功启动
测试监控主机和被监控设备之间的连通性(Server上):
#/usr/local/nagios/libexec/check_nrpe -H 10.100.2.200
NRPE v3.0.1 //通信成功
2>、Server监控端创建Client被监控端配置文件
# vim /usr/local/nagios/etc/servers/test.cfg //监控主机上新建Client端配置文件
define host{
use linux-server
host_name commission
alias commission
address 10.100.2.200
max_check_attempts 5
check_period 24x7
notification_interval 30
notification_period 24x7
}
define service{
use generic-service
host_name commission
service_description PING
check_command check_ping!100.0,20%!500.0,60%
}
define service{
use generic-service
host_name commission
service_description SSH
check_command check_ssh
notifications_enabled 0 ;disable notification
}
define service{
use generic-service
host_name commission
service_description CPU
check_command check_nrpe!check_cpu
notifications_enabled 1
}
define service{
use generic-service
host_name commission
service_description Physical Memory
check_command check_nrpe!check_mem
notifications_enabled 1
}
//可以以templates.cfg模板进行修改
关于check_cpu和check_mem自定义插件的使用方法(插件见附件):
2.1从官网下载需要的插件,注意修改+x执行权限和属性
2.2修改Client端配置:修改nrpe.cfg,增加以下内容
command[check_mem]=/usr/local/nagios/libexec/check_mem -w 10 -c 5
command[check_cpu]=/usr/local/nagios/libexec/check_cpu -w 80 -c 90
2.3重启nrpe服务
# pkill nrpe&&/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
2.4修改Server端配置:修改test.cfg,在define service中定义check_command
check_command check_nrpe!check_mem
check_command check_nrpe!check_cpu
3>、利用NSClicent++监控远程Windows系统
下载插件包NSCP-0.4.4.19-x64.msi
在Windows客户端安装插件包:
# vim /usr/local/nagios/etc/objects/commands.cfg
# 'check_nt' command definition
define command{
command_name check_nt
command_line $USER1$/check_nt -H $HOSTADDRESS$ -p 12489 -s 123456 -v $ARG1$ $ARG2$
}
添加监控客户端:
# vim /usr/local/nagios/etc/nagios.cfg
cfg_file=/usr/local/nagios/etc/objects/windows.cfg //启用windows监控,如果有添加启用cfg_dir=/usr/local/nagios/etc/servers目录则需要注释掉windows.cfg,否则会有冲突
以windows.cfg为模板,添加新的windows服务器
# cp /usr/local/nagios/etc/objects/windows.cfg /usr/local/nagios/etc/servers/wintest.cfg
//修改配置中的host_name,IP地址等。
# /usr/local/nagios/libexec/check_nt -H 10.100.2.189 -p 12489 -s 123456 -v UPTIME
//测试客户端连通性(注意有特殊符号需要单引号),以下信息表示连接正常。
System Uptime - 20 day(s) 4 hour(s)11 minute(s) |uptime=29051
# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg //测试配置
# systemctl restart nagios //重启nagios服务
默认check_nt!MEMUSE!-w 80 –c 90监控的是物理内存和虚拟内存的总和,单独监控物理内存方法:
1) 修改Client的nsclient.ini文件三个选项:
[/settings/NRPE/server]下的insecure = true、verify mode = none、allow arguments = true
修改完成后,通过# /usr/local/nagios/libexec/check_nrpe -H 10.100.2.189测试连通性,
I (0.4.4.19 2015-12-08) seem to bedoing fine...表示连接正常,如果提示
CHECK_NRPE: Error - Couldnot complete SSL handshake.则表示未修改正确。
查看监控显示结果:
# /usr/local/nagios/libexec/check_nrpe -H 10.100.2.189 -p 5666 -c CheckMEM -a MaxWarn=80% MaxCrit=90% type=physicalShowAll
2) 修改Server的commands.cfg文件,定义物理内存监控服务
define command{
command_name check_winmem
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -p 5666-c CheckMEM -a MaxWarn=$ARG1$% MaxCrit=$ARG2$% ShowAll=long type=physical
}
3) 修改Server的客户端配置文件xenapp.cfg,定义监控内容
define service{
use generic-service
host_name xenapp
service_description PhysicalMemory
check_command check_winmem!80!90
}
4) 检测配置文件是否有错误:# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
5) 重启nagios服务:# systemctl restart nagios
三、安装Cacti
1、下载软件包并安装
Cacti-0.8.8h:
Cacti-spine-0.8.8h:
官方安装手册:
配置安装环境:
#yum install httpd php php-mysql php-snmp php-xml mariadb mariadb-server
2、安装RRDtool工具
# yum install rrdtool
# rrdtool -h
3、安装SNMP服务
# yum install net-snmp net-snmp-utils
# systemctl enable snmpd
# systemctl start snmpd
4、安装cacti-spine(高效采集器)
# yum install net-snmp-devel mariadb-devel openssl-devel
# yum install autoconf automake binutils dos2unix gcc cpplibtool glibc-devel glibc-headers kernel-headers
# yum install wget patch
# tar zxvf cacti-spine-0.8.8h.tar.gz
# cd cacti-spine-0.8.8h/
# aclocal
# libtoolize –force
# autoheader
# autoconf
# automake
# ./configure
# make
# make install
# cp /usr/local/spine/bin/spine /usr/bin/spine
# cp /usr/local/spine/etc/spine.conf.dist /etc/spine.conf
# chown nagios.nagios /etc/spine.conf
# vim /etc/spine.conf
DB_Host localhost
DB_Database cacti
DB_User cactiuser
DB_Pass 123456
DB_Port 3306
# /usr/bin/spine //执行检查是否有错,安装完cacti后再执行
5、创建cacti数据库
启动数据库:
# systemctl enable mariadb
# systemctl start mariadb
# mysqladmin -uroot password 'rootpasswd' //Mariadb默认密码为空,先设置密码
# mysql -uroot -p //使用root权限账号登录
create database cacti; //创建数据库
grant all on cacti.* to cactiuser@'localhost' identified by '123456'; //授于本地登录权限
6、安装cacti程序
# tar zxvf cacti-0.8.8h.tar.gz
# mv cacti-0.8.8h /var/www/html/cacti
# mysql -u cactiuser -p cacti
# chmod -R 777 /var/www/html/cacti/rra //授于rra和log文件夹777权限
# chmod -R 777 /var/www/html/cacti/log
# /usr/bin/spine //显示以下内容表示连接正常
SPINE:Using spine config file [/etc/spine.conf]
SPINE:Version 0.8.8h starting
SPINE:Time: 0.0455 s, Threads: 5, Hosts: 2
7、修改cacti全局配置文件
# vim /var/www/html/cacti/include/config.php
修改默认数据库名及连接数据库的用户名和密码
$database_default= "cacti";
$database_username= "cactiuser";
$database_password= "123456";
修改cacti系统时区,否则php会有告警日志信息
# vim /var/www/html/cacti/include/global.php //增加一行
date_default_timezone_set('Asia/Shanghai');
8、添加RRDtool抓图任务计划
# crontab -e
输入以下任务计划:
*/5 * * ** /usr/bin/php /var/www/html/cacti/poller.php >> /tmp/cacti_rrdtool.log2>&1
9、配置SELinux
测试php模块是否正常,
#vim phpinfo.php //在html首页目录下
//测试完成后删除文件
-------------------------------------------------------------------------------------------
测试Mysql数据库的连接是否正常,
#vim mysqltest.php //html首页目录下,名称随意起
If($link) echo “connect success!”;
else echo “connect fail!”;?>
测试数据库连接性前,需要修改sebool值:在SELinux启用情况下,php连接mysql测试会失败
#getsebool -a |grep httpd_can_network_connect //查看httpd进程连接模式,默认为off
#setsebool -P httpd_can_network_connect=1 //启用连接后即可测试正常
------------------------------------------------------------------------------------------
配置SELinux上下文,否则访问cacti时会提示禁止访问:
# yum install policycoreutils-python //安装semanage工具,默认未安装
# ls -Zd cacti/ //查看当前cacti目录的上下文,为admin_home_t
# semanage fcontext -a -t httpd_sys_content_t '/var/www/html/cacti(/.*)?' //定义cacti目录的上下文规则
# restorecon -RFvv cacti/ //更改cacti目录的上下文
修改完成后重启httpd
# vim /usr/local/nagios/etc/objects/commands.cfg
# 'check_nt' command definition
define command{
command_name check_nt
command_line $USER1$/check_nt -H $HOSTADDRESS$ -p 12489 -s 123456 -v $ARG1$ $ARG2$
}
添加监控客户端:
# vim /usr/local/nagios/etc/nagios.cfg
cfg_file=/usr/local/nagios/etc/objects/windows.cfg //启用windows监控,如果有添加启用cfg_dir=/usr/local/nagios/etc/servers目录则需要注释掉windows.cfg,否则会有冲突
以windows.cfg为模板,添加新的windows服务器
# cp /usr/local/nagios/etc/objects/windows.cfg /usr/local/nagios/etc/servers/wintest.cfg
//修改配置中的host_name,IP地址等。
# /usr/local/nagios/libexec/check_nt -H 10.100.2.189 -p 12489 -s 123456 -v UPTIME
//测试客户端连通性(注意有特殊符号需要单引号),以下信息表示连接正常。
System Uptime - 20 day(s) 4 hour(s)11 minute(s) |uptime=29051
# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg //测试配置
# systemctl restart nagios //重启nagios服务
默认check_nt!MEMUSE!-w 80 –c 90监控的是物理内存和虚拟内存的总和,单独监控物理内存方法:
1) 修改Client的nsclient.ini文件三个选项:
[/settings/NRPE/server]下的insecure = true、verify mode = none、allow arguments = true
修改完成后,通过# /usr/local/nagios/libexec/check_nrpe -H 10.100.2.189测试连通性,
I (0.4.4.19 2015-12-08) seem to bedoing fine...表示连接正常,如果提示
CHECK_NRPE: Error - Couldnot complete SSL handshake.则表示未修改正确。
查看监控显示结果:
# /usr/local/nagios/libexec/check_nrpe -H 10.100.2.189 -p 5666 -c CheckMEM -a MaxWarn=80% MaxCrit=90% type=physicalShowAll
2) 修改Server的commands.cfg文件,定义物理内存监控服务
define command{
command_name check_winmem
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -p 5666-c CheckMEM -a MaxWarn=$ARG1$% MaxCrit=$ARG2$% ShowAll=long type=physical
}
3) 修改Server的客户端配置文件xenapp.cfg,定义监控内容
define service{
use generic-service
host_name xenapp
service_description PhysicalMemory
check_command check_winmem!80!90
}
4) 检测配置文件是否有错误:# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
5) 重启nagios服务:# systemctl restart nagios