分类: Oracle
2010-02-04 13:10:19
From: ITPUB.NET Link: 【IT168技术文档】 1. 背景概述 2. 算法
3. Shell代码 #!/bin/sh #设置alert日志文件的环境变量 ORIGINAL_ALERT="/arraybill/home/oracle/app/oracle/admin/hnbill/bdump/alert_hnbill.log" #判断输入的参数:天 if [ $# -ne 1 ] then echo "参数不对,用法: sh check_alert.sh 3" exit 1 fi if [ $1 -lt 0 ] then echo "对不起,请输入一个非负整数" exit 1 fi #查询参数对应时间以前的系统时间 sh dated_alert.sh $1 >/dev/null #分解时间格式,注意,其中天可能是一位,也可能是两位,月份是3位,月份与天组成的字符串长度固定 str_day=$(sed 1d tmp_date.log|awk '{if(substr($1,1,1) == 0) print substr($1,2,1);else print substr($1,1,2) }') str_length=$(echo $str_day|awk '{print length($1)}') #将三个字母表示的月份分解成首字母大写,其他小写 str_month=$(sed 1d tmp_date.log|awk '{print substr($1,4,3)}'|sed '1,$y/[JFMASODN]ABCDEFGHIJKLMNOPQRSTUVWXYZ/[JFMASODN]abcdefghijklmnopqrstuvwxyz/') if [ $str_length -eq 1 ] then echo "awk '/""$str_month $str_day""/{print NR}' "$ORIGINAL_ALERT"" >tmp.sh #echo "$str_month $str_day" fi if [ $str_length -eq 2 ] then echo "awk '/""$str_month $str_day""/{print NR}' "$ORIGINAL_ALERT"" >tmp.sh #echo "$str_month $str_day" fi chmod 777 tmp.sh chmod 777 tmp.sh sh tmp.sh |awk 'NR==1{print $1}' >rownum.lst #获取要处理的alert日志的开始行号和结束行号 alert_row_begin=$(cat rownum.lst |awk '{print $1}') alert_row_end=$(wc -l $ORIGINAL_ALERT|awk '{print $1}') echo "awk 'NR==$alert_row_begin,NR==$alert_row_end{print \$0}' "$ORIGINAL_ALERT"" >tmp.sh #根据行号范围,截取alert日志内容,并保存到临时文件tmp_alert.tmp中 sh tmp.sh >tmp_alert.tmp echo echo "-----------------------------------------------" echo "错误汇总 开始时间:$begin_date_time" echo "-----------------------------------------------" echo echo "数量 错误号" echo "---- -------------------" grep ORA- tmp_alert.tmp |awk '{print substr($0,0,9)}'|sort -nr| uniq -c|sort -nr #如果有600错误,调用600错误的处理脚本 if [ $(awk '/ORA-00600/{print NR}' tmp_alert.tmp|wc -l|awk '{print $1}') -gt 0 ] then sh check_00600.sh fi #如果有07445错误,调用07445错误的处理脚本 if [ $(awk '/ORA-07445/{print NR}' tmp_alert.tmp|wc -l|awk '{print $1}') -gt 0 ] then sh check_07445.sh fi #如果有 000060 错误,调用000060错误的处理脚本 if [ $(awk '/ORA-000060/{print NR}' tmp_alert.tmp|wc -l|awk '{print $1}') -gt 0 ] then sh check_000060.sh fi #如果有 01555 错误,调用01555错误的处理脚本 if [ $(awk '/ORA-01555/{print NR}' tmp_alert.tmp|wc -l|awk '{print $1}') -gt 0 ] then sh check_01555.sh fi echo #删除调用过程中产生的临时文件 rm -f tmp_3_line.sh rm -f tmp_date.log rm -f tmp.sh rm -f ora01555_trc.log rm -f ora006000_trc.log rm -f ora07445_trc.log rm -f ora000060_trc.log rm -f trc_tmp rm -f tmp_alert.tmp rm -f ora006000_rownum.log rm -f rownum.lst
echo "set heading off" >get_date.sql echo "set feedback off" >>get_date.sql echo "set term off" >>get_date.sql echo "spool tmp_date.log" >>get_date.sql echo "select sysdate-"$1" from dual;" >>get_date.sql echo "spool off" >>get_date.sql echo "exit" >>get_date.sql export NLS_LANG=american sqlplus "/as sysdba" @get_date.sql <<EOF EOF rm -f get_date.sql
处理ORA-00600错误的脚本check_00600.sh echo echo "----------------------" echo "ORA-00600错误跟踪信息" echo "----------------------" awk '/ORA-00600/{print NR}' tmp_alert.tmp|awk '{print "awk ""'\''""NR=="$1-1" {print ""\"mget \"""substr($4,1,length($4)-1)}""'\''"" tmp_alert.tmp"}' >tmp1.sh chmod 777 tmp1.sh chmod 777 tmp1.sh echo echo sh tmp1.sh |sort -r| uniq >ora006000_trc.log #对所有的ORA-00600跟踪文件进行处理,去掉重复的文件,判断文件是否存在,并生成ftp可用的格式 echo "" >trc_tmp for trc_file in $(cat ora006000_trc.log|awk '{print $2}') do if [ -f $trc_file ] then echo "mget $trc_file" >>trc_tmp else echo "找不到跟踪文件: $trc_file" >>trc_tmp fi done cat trc_tmp|sort -r echo echo echo >tmp_3_line.sh chmod 777 tmp_3_line.sh chmod 777 tmp_3_line.sh #对于每个ORA-00600,打印时间戳、跟踪文件和具体报错信息 for tmp_row_num in $(awk '/ORA-00600/{print NR}' tmp_alert.tmp) do let tmp_time=$tmp_row_num-2 let tmp_trc=$tmp_row_num-1 echo "awk 'NR=="$tmp_time"{print \$0}' tmp_alert.tmp" >>tmp_3_line.sh echo "awk 'NR=="$tmp_trc"{print \$0}' tmp_alert.tmp" >>tmp_3_line.sh echo "awk 'NR=="$tmp_row_num"{print \$0}' tmp_alert.tmp" >>tmp_3_line.sh echo "echo" >>tmp_3_line.sh done sh tmp_3_line.sh echo echo #错误信息提示 echo "---------------" echo "关于ORA-00600" echo "---------------" oerr ora 00600 echo
$ sh check_alert.sh 2 ---------------------------------------------- 错误汇总 开始时间:Wed Sep 10 00:11:39 2008 ----------------------------------------------- 数量 错误号 ---- ------------------- 3 ORA-00600 1 ORA-01555 ---------------------- ORA-00600 错误跟踪信息 ---------------------- mget /arraybill/home/oracle/app/oracle/admin/hnbill/udump/hnbill_ora_5984.trc 找不到跟踪文件: /arraybill/home/oracle/app/oracle/admin/hnbill/udump/hnbill_ora_10650.trc Fri Sep 10 11:55:42 2008 Errors in file /arraybill/home/oracle/app/oracle/admin/hnbill/udump/hnbill_ora_15196.trc: ORA-00600: internal error code, arguments: [qerrmObnd1], [932], [ORA-00932: inconsistent datatypes: expected got Fri Sep 10 11:56:16 2008 Errors in file /arraybill/home/oracle/app/oracle/admin/hnbill/udump/hnbill_ora_15196.trc: ORA-00600: internal error code, arguments: [qerrmObnd1], [932], [ORA-00932: inconsistent datatypes: expected got Tue Sep 10 19:48:49 2008 Errors in file /arraybill/home/oracle/app/oracle/admin/hnbill/udump/hnbill_ora_5984.trc: ORA-00600: internal error code, arguments: [729], [1200], [space leak], [], [], [], [], [] --------------- 关于ORA-00600 --------------- 00600, 00000, "internal error code, arguments: [%s], [%s], [%s], [%s], [%s], [%s], [%s], [%s]" // *Cause: This is the generic internal error number for Oracle program // exceptions. This indicates that a process has encountered an // exceptional condition. // *Action: Report as a bug - the first argument is the internal error number ------------------------- ORA-01555 错误跟踪信息 ------------------------- Thu Sep 11 04:38:18 2008 ORA-01555 caused by SQL statement below (Query Duration=12429 sec, SCN: 0x0240.ba061afa): Thu Sep 11 04:38:18 2008 SELECT * FROM BB_SERVICE_RELATION_T WHERE CITY_CODE = :B4 AND SERVICE_KIND = :B3 AND IF_VALID = 1 AND SERVING_STATUS <> 10 AND SERVING_STATUS <> 20 AND (PRESERVE02 <> 1 OR PRESERVE02 IS NULL) AND MOD(USER_ID,:B2 ) = :B1 - 1 --------------- 关于ORA-01555 --------------- 01555, 00000, "snapshot too old: rollback segment number %s with name \"%s\" too small" // *Cause: rollback records needed by a reader for consistent read are // overwritten by other writers // *Action: If in Automatic Undo Management mode, increase undo_retention // setting. Otherwise, use larger rollback segments 4. 后处理与展望 |