全部博文(167)
分类: 系统运维
2010-06-30 11:52:12
child of cluster
Starts a thread that periodically makes a request to the server, and restarts Resin if it fails.
This facility is used to increase server reliability - if there is a problem with the server
(perhaps from a deadlock or an exhaustion of resources), the server is restarted.
A failure occurs if a request to the url returns an HTTP status that is not 200.
Since the local process is restarted, it does not make sense to specify a url that does not
get serviced by the instance of Resin that has the ping configuration. Most configurations
use url's that specify 'localhost' as the host.
This pinging only catches some problems because it's running in the same process as
Resin itself. If the entire JDK freezes, this thread will freeze as well. Assuming the
JDK doesn't freeze, the PingThread will catch errors like deadlocks.
现要做的效果是:
应用自动登录到数据库里面(只登陆不做任何操作);发现登录不了数据库就发告警邮件;一个小时内
不能连接的次数超过10次 就进行封端口和重启应用的操作。
因为resin的启动脚本是自己书写,所以本脚本有一些变量是依赖于我自己的启动脚本
因为有一套运维系统,所以脚本中有一些返回值,给运维系统进行画图,告警等一系列操作,所以
可以不看这些输出值。
我在resin_home下建立了line_plugin目录并将一些jar插件放在里面;
对于本脚本 用了mysql 和 oracle数据库的检测程序 为了以后增加功能所以将他们分开
做jar包 其实代码是一样。
#!/bin/bash
#author Sky
PATH="/var/PROGRAM/MANAGEMENT/modules/xbash:/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin:"
LANG=zh_CN
export PATH LANG
JAVAHOME=`cat /etc/init.d/$1 | grep "JAVA_HOME=" | awk -F"=" '{print $2}'`
RESINHOME=`cat /etc/init.d/$1 | grep "RESIN_HOME=" | awk -F"=" '{print $2}'`
CONF_NAME=`cat /etc/init.d/$1 | grep "SEARCH_STR=" | awk -F"=" '{print $2}' | sed 's/\"//g'`
PLUGIN_MYSQL="app_chk_mysqldb.jar"
PLUGIN_ORACLE="app_chk_oracledb.jar"
######agent 初始化输出###########
if [[ $# -eq 0 ]] ; then
echo "appdbchk_=0"
exit 1
fi
if [[ ! -e /etc/init.d/$1 ]] ; then
echo "appdbchk_=0"
exit 1
fi
#################################
###########环境初始化############
[[ ! -e $JAVAHOME/jre/lib/ext/mysql-connector-java-5.1.10-bin.jar ]] && cp $RESINHOME/lib/mysql-connector-java-5.1.10-bin.jar $JAVAHOME/jre/lib/ext/
[[ ! -e $JAVAHOME/jre/lib/ext/ojdbc14.jar ]] && cp $RESINHOME/lib/ojdbc14.jar $JAVAHOME/jre/lib/ext/
#################################
##########DB URL OPTION##########
DBURL=""
DBUSER=""
DBPASSWD=""
#################################
############
# 主机信息 #
############
INTF=$(netstat -rn | tail -1 | awk '{print $NF}')
IP=$(ifconfig $INTF | awk '/inet addr/{ split($2,tmp,":") ; print tmp[2] }')
HOST_NAME=$(hostname --short)
###########
##########
#邮件环境#
##########
#CHARTSET="zh_CN."
MAIL_CLIENT=""
MAIL_SENDER="
MAIL_SERVER=""
case $IP in
192.168.230.*|192.168.1[0-1].*|192.168.238.*|202..*)
MAIL_SERVER=""
;;
*)
MAIL_SERVER=""
;;
esac
##########
cd $RESINHOME
open=0 #读开关
topen=0 #检测数据库开关
wlog=0 #写log的标签
dbtap=0 #数据库状态
cat $RESINHOME/conf/$CONF_NAME |
while read line
do
if echo $line | grep -q "
open=1
fi
if echo $line | grep -q "
open=0
fi
if [[ $open -eq 1 ]] ; then
if echo $line | grep -q "
DBURL=`echo $line | sed 's/
topen=$(($topen+1))
fi
if echo $line | grep -q "
DBUSER=`echo $line | sed 's/
topen=$(($topen+1))
fi
if echo $line | grep -q "
DBPASSWD=`echo $line | sed 's/
topen=$(($topen+1))
fi
fi
if [[ $topen -eq 3 ]] ; then
if echo $DBURL | grep -q "mysql"
then
dbtap=`$JAVAHOME/bin/java -jar $RESINHOME/line_plugin/$PLUGIN_MYSQL $DBURL $DBUSER $DBPASSWD`
else
dbtap=`$JAVAHOME/bin/java -jar $RESINHOME/line_plugin/$PLUGIN_ORACLE $DBURL $DBUSER $DBPASSWD`
fi
appdbname=`echo "$DBURL" | awk -F":" '{print $NF}'`
if echo $dbtap | grep -q "DB is BAD"
then
echo "appdbchk_$appdbname=1"
else
echo "appdbchk_$appdbname=0"
fi
if echo $dbtap | grep -q "DB is BAD"
then
dbname=`echo "$DBURL" | awk -F"/" '{print $NF}'`
if [[ -e $RESINHOME/line_plugin/$dbname_$DBUSER.log ]] ; then
GHOUR=`awk '{print $1}' $RESINHOME/line_plugin/$dbname_$DBUSER.log`
NHOUR=$(date +%k)
BADNUM=`awk '{print $2}' $RESINHOME/line_plugin/$dbname_$DBUSER.log`
if echo "$GHOUR" | grep -q "$NHOUR"
then
BADNUM=$(($BADNUM+1))
if [[ $BADNUM -gt 10 ]] ; then
echo "连接$DBURL失败已超十次" > /tmp/.dbdisconnection
#########发送邮件############
env MAILRC=/dev/null charset="$CHARTSET" from="$MAIL_SENDER" smtp="$MAIL_SERVER" \
nail -n -s "$HOST_NAME($IP)$1数据库连接失败超10次" $MAIL_CLIENT < /tmp/.dbdisconnection
else
echo "连接$DBURL失败" > /tmp/.dbdisconnection
#########发送邮件############
env MAILRC=/dev/null charset="$CHARTSET" from="$MAIL_SENDER" smtp="$MAIL_SERVER" \
nail -n -s "$HOST_NAME($IP)$1数据库连接失败" $MAIL_CLIENT < /tmp/.dbdisconnection
fi
echo "$GHOUR $BADNUM" > $RESINHOME/line_plugin/$dbname_$DBUSER.log
################
####重启应用代码
################
fi
else
NHOUR=$(date +%k)
echo "$NHOUR 0" > $RESINHOME/line_plugin/$dbname_$DBUSER.log
fi
else
NHOUR=$(date +%k)
echo "$NHOUR 0" > $RESINHOME/line_plugin/$dbname_$DBUSER.log
fi
topen=0
fi
done
|