昨天在znh,在红旗安全操作系统4.0(也就是Asianux3.0sp2)上,安装IBM tivoli Monitoring Agent for Oracle V06.20.01.00 for Linux Intel R2.6 (32 bit)的时候,出现启动后很快退出的情况,退出时会有一些库文件的提示:
[root@SOS ~]# cd /opt/IBM/ITM/bin/
[root@SOS bin]# ./itmcmd agent start or
CandleDBAgent : installer level 620 / 300.
itmcmd agent : Removing stale SOS_or.run entry: orcl=26653.
itmcmd agent : considering servers: orcl.
kddexec: /opt/IBM/ITM already local disk
Collector and Agent started for orcl
[root@SOS bin]#(几分钟后) *** glibc detected *** /opt/IBM/ITM/tmp/SOS_or_orcl/kddsignl: corrupted double-linked list: 0x09a5bf28 ***
======= Backtrace: =========
/lib/libc.so.6[0x3eaf19]
/lib/libc.so.6(cfree+0x90)[0x3eebc0]
/opt/IBM/ITM/tmp/SOS_or_orcl/kddsignl(u_free+0xd3)[0x8052951]
/opt/IBM/ITM/tmp/SOS_or_orcl/kddsignl(_Z13KillProcessesi+0x3ff)[0x804d139]
/opt/IBM/ITM/tmp/SOS_or_orcl/kddsignl(main+0x429)[0x804bdb5]
/lib/libc.so.6(__libc_start_main+0xdc)[0x397e8c]
/opt/IBM/ITM/tmp/SOS_or_orcl/kddsignl(__gxx_personality_v0+0x2f9)[0x804b8f1]
======= Memory map: ========
00364000-0037e000 r-xp 00000000 fd:00 1343582 /lib/ld-2.5.so
0037e000-0037f000 r-xp 00019000 fd:00 1343582 /lib/ld-2.5.so
0037f000-00380000 rwxp 0001a000 fd:00 1343582 /lib/ld-2.5.so
00382000-004c0000 r-xp 00000000 fd:00 1343583 /lib/libc-2.5.so
004c0000-004c2000 r-xp 0013e000 fd:00 1343583 /lib/libc-2.5.so
004c2000-004c3000 rwxp 00140000 fd:00 1343583 /lib/libc-2.5.so
004c3000-004c6000 rwxp 004c3000 00:00 0
004c8000-004ed000 r-xp 00000000 fd:00 1343590 /lib/libm-2.5.so
004ed000-004ee000 r-xp 00024000 fd:00 1343590 /lib/libm-2.5.so
004ee000-004ef000 rwxp 00025000 fd:00 1343590 /lib/libm-2.5.so
004f1000-004f3000 r-xp 00000000 fd:00 1343584 /lib/libdl-2.5.so
004f3000-004f4000 r-xp 00001000 fd:00 1343584 /lib/libdl-2.5.so
004f4000-004f5000 rwxp 00002000 fd:00 1343584 /lib/libdl-2.5.so
004f7000-0050a000 r-xp 00000000 fd:00 1343585 /lib/libpthread-2.5.so
0050a000-0050b000 r-xp 00012000 fd:00 1343585 /lib/libpthread-2.5.so
0050b000-0050c000 rwxp 00013000 fd:00 1343585 /lib/libpthread-2.5.so
0050c000-0050e000 rwxp 0050c000 00:00 0
00525000-00530000 r-xp 00000000 fd:00 1146897 /lib/libgcc_s-4.1.2-20080825.so.1
00530000-00531000 rwxp 0000a000 fd:00 1146897 /lib/libgcc_s-4.1.2-20080825.so.1
00540000-0054f000 r-xp 00000000 fd:00 1146896 /lib/libresolv-2.5.so
0054f000-00550000 r-xp 0000e000 fd:00 1146896 /lib/libresolv-2.5.so
00550000-00551000 rwxp 0000f000 fd:00 1146896 /lib/libresolv-2.5.so
00551000-00553000 rwxp 00551000 00:00 0
0088f000-00896000 r-xp 00000000 fd:00 1343586 /lib/librt-2.5.so
00896000-00897000 r-xp 00006000 fd:00 1343586 /lib/librt-2.5.so
00897000-00898000 rwxp 00007000 fd:00 1343586 /lib/librt-2.5.so
0090d000-00920000 r-xp 00000000 fd:00 1146895 /lib/libnsl-2.5.so
00920000-00921000 r-xp 00012000 fd:00 1146895 /lib/libnsl-2.5.so
00921000-00922000 rwxp 00013000 fd:00 1146895 /lib/libnsl-2.5.so
00922000-00924000 rwxp 00922000 00:00 0
00aea000-00af3000 r-xp 00000000 fd:00 1343599 /lib/libcrypt-2.5.so
00af3000-00af4000 r-xp 00008000 fd:00 1343599 /lib/libcrypt-2.5.so
00af4000-00af5000 rwxp 00009000 fd:00 1343599 /lib/libcrypt-2.5.so
00af5000-00b1c000 rwxp 00af5000 00:00 0
08048000-08067000 r-xp 00000000 fd:00 819816 /opt/IBM/ITM/tmp/SOS_or_orcl/kddsignl
08067000-0806a000 rwxp 0001f000 fd:00 819816 /opt/IBM/ITM/tmp/SOS_or_orcl/kddsignl
09a4f000-09a92000 rwxp 09a4f000 00:00 0 [heap]
40000000-40001000 r-xp 40000000 00:00 0 [vdso]
40001000-40002000 rwxp 40001000 00:00 0
40002000-4004b000 r-xp 00000000 fd:00 852118 /opt/IBM/ITM/tmaitm6/li6263/lib/libkbb.so
4004b000-40054000 rwxp 00048000 fd:00 852118 /opt/IBM/ITM/tmaitm6/li6263/lib/libkbb.so
40054000-40055000 rwxp 40054000 00:00 0
40055000-40065000 r-xp 00000000 fd:00 852143 /opt/IBM/ITM/tmaitm6/li6263/lib/libknsnls2.so
40065000-40072000 rwxp 0000f000 fd:00 852143 /opt/IBM/ITM/tmaitm6/li6263/lib/libknsnls2.so
40072000-40140000 r-xp 00000000 fd:00 852146 /opt/IBM/ITM/tmaitm6/li6263/lib/libkra.so
40140000-40165000 rwxp 000cd000 fd:00 852146 /opt/IBM/ITM/tmaitm6/li6263/lib/libkra.so
40165000-40167000 rwxp 40165000 00:00 0
4017f000-40180000 rwxp 4017f000 00:00 0
40180000-4022f000 r-xp 00000000 fd:00 491613 /usr/lib/libstdc++.so.5.0.7
4022f000-40234000 rwxp 000ae000 fd:00 491613 /usr/lib/libstdc++.so.5.0.7
40234000-4023a000 rwxp 40234000 00:00 0
4023a000-40312000 r-xp 00000000 fd:00 852128 /opt/IBM/ITM/tmaitm6/li6263/lib/libkcuuc32.so
4031200
|
通过日志查看,发现原因是/opt/IBM/ITM/li6263/bin/kor10col在执行的时候发现库文件中某个变量sqlcxt没定义。
/opt/IBM/ITM/li6263/bin/kor10col: symbol lookup error: /opt/IBM/ITM/li6263/bin/kor10col: undefined symbol: sqlcxt
|
首先想到的是用ldd看一下此命令所调用的库文件是否齐全。不过很遗憾,非常齐全,所有的so文件都能找到。
ldd /opt/IBM/ITM/li6263/bin/kor10col
linux-gate.so.1 => (0x40000000)
libkbb.so => /opt/IBM/ITM/tmaitm6/li6263/lib/libkbb.so (0x40002000)
libknsnls2.so => /opt/IBM/ITM/tmaitm6/li6263/lib/libknsnls2.so (0x40055000)
libkra.so => /opt/IBM/ITM/tmaitm6/li6263/lib/libkra.so (0x40072000)
libnsl.so.1 => /lib/libnsl.so.1 (0x0090d000)
libpthread.so.0 => /lib/libpthread.so.0 (0x004f7000)
librt.so.1 => /lib/librt.so.1 (0x0088f000)
libdl.so.2 => /lib/libdl.so.2 (0x004f1000)
libcrypt.so.1 => /lib/libcrypt.so.1 (0x00aea000)
libclntsh.so.10.1 => /usr/lib/libclntsh.so.10.1 (0x40180000)
libm.so.6 => /lib/libm.so.6 (0x004c8000)
libstdc++.so.5 => /usr/lib/libstdc++.so.5 (0x40183000)
libgcc_s.so.1 => /lib/libgcc_s.so.1 (0x00525000)
libc.so.6 => /lib/libc.so.6 (0x00382000)
libkcuuc32.so => /opt/IBM/ITM/tmaitm6/li6263/lib/libkcuuc32.so (0x4023c000)
libkcutu32.so => /opt/IBM/ITM/tmaitm6/li6263/lib/libkcutu32.so (0x40329000)
libkcudt32.so => /opt/IBM/ITM/tmaitm6/li6263/lib/libkcudt32.so (0x40336000)
libkcui18n32.so => /opt/IBM/ITM/tmaitm6/li6263/lib/libkcui18n32.so (0x40c9c000)
libkcuio32.so => /opt/IBM/ITM/tmaitm6/li6263/lib/libkcuio32.so (0x40dcc000)
libkhdxcl1.so => /opt/IBM/ITM/tmaitm6/li6263/lib/libkhdxcl1.so (0x40dd6000)
libkdc.so => /opt/IBM/ITM/tmaitm6/li6263/lib/libkdc.so (0x40e52000)
libkde.so => /opt/IBM/ITM/tmaitm6/li6263/lib/libkde.so (0x40ec7000)
libkglbase.so => /opt/IBM/ITM/tmaitm6/li6263/lib/libkglbase.so (0x40f19000)
libkdsfilt.so => /opt/IBM/ITM/tmaitm6/li6263/lib/libkdsfilt.so (0x40fbe000)
/lib/ld-linux.so.2 (0x00364000)
libkgl01p1.so => /opt/IBM/ITM/tmaitm6/li6263/lib/libkgl01p1.so (0x40ff0000)
libkgl01p2.so => /opt/IBM/ITM/tmaitm6/li6263/lib/libkgl01p2.so (0x4100d000)
libkdh.so => /opt/IBM/ITM/tmaitm6/li6263/lib/libkdh.so (0x41054000)
libresolv.so.2 => /lib/libresolv.so.2 (0x00540000)
libkns.so => /opt/IBM/ITM/tmaitm6/li6263/lib/libkns.so (0x4107b000)
libgsk7iccs.so => /usr/lib/libgsk7iccs.so (0x410e0000)
libkolops.so => /opt/IBM/ITM/tmaitm6/li6263/lib/libkolops.so (0x410ea000)
libkolccl.so => /opt/IBM/ITM/tmaitm6/li6263/lib/libkolccl.so (0x410ee000)
|
直接执行这个kor10col命令的时候,是没有报错的。现象就是这样了。我想会不会还是因为环境变量问题:
在shell里面,kor10col可以从某个库文件中取得sqlcxt变量,而在用itmcmd命令执行时,因为itmcmd从特定配置文件中取环境变量,造成跟shell不一样,所以找不到sqlxct变量? 这个问题好验证,通过修改itmcmd第一行,
在/bin/ksh后添加-x,
让脚本打印执行过程,最后判断是/opt/IBM/ITM/li6263/bin/kddexec.sh此脚本调用了kor10col,所以在此文件中特定位置添加ldd查看kor10col是否能够正确调用到所有库文件,并且执行一下此文件:
case $Action in
(start) cv= # Collector version-suffix
echo "++++++++++++++++++++++++++++++++++++++++++++"
ldd /opt/IBM/ITM/li6263/bin/kor10col
/opt/IBM/ITM/li6263/bin/kor10col
echo "++++++++++++++++++++++++++++++++++++++++++++"
|
测试结果是:用itmcmd命令执行时,ldd kor10col可以正确找到所有so库文件,而且跟shell里面是一模一样的。并且单独执行此命令时,也是提示undefined symbol: sqlcxt,那肯定是某个so库文件中的确没有定义sqlcxt变量。其实在shell里面单独执行kor10col时,因为某个kddproc.ctl文件没有找到,所以没有执行到缺少变量sqlcxt的地方。
[root@SOS ~]# /opt/IBM/ITM/li6263/bin/kor10col
CAT2015I (011444) OMA for Distributed Database. Data Collector
CAT2020I (011444) Version 3.1.0.0 - June 15, 1999
CAT2025I (011444) (c) Copyright Candle Corporation 1995, 1996, 1997, 1998, 1999.
CAT2030I (011444) All rights reserved.
CAT2035I (011444) Time = 2009/09/28 01:14:44
kddcoll.c,v 4 2021:smyu Exp $ 20 2211:3 stg.lux.
kddcdiu.cpp, 3 2021:smyu Exp $ 20 2211:3 stg.lux.
kddcomm.c,v 2 2016:meva Exp $ 20 2211:3 stg_.lnx
kddcomm2.cpp 3 2013:jerr Exp $ 20 2211:3 stg_.lnx
kddexpwh.cpp 2 2016:meva Exp $ 20 2211:3 stg.lux.
kddfext.cpp, 2 2016:meva Exp $ 20 2211:3 stg.lux.
kddgchn.cpp, 2 2016:meva Exp $ 20 2211:3 stg_.lnx
kddif.c,v 2 2016:meva Exp $ 20 2211:3 stg.lux.
kddlex.c,v 2 2016:meva Exp $ 20 2211:3 stg.lux.
kddmemz.cpp, 2 2016:meva Exp $ 20 2211:3 stg.lux.
kddpars.c,v 3 2022:smyu Exp $ 20 2211:3 stg.lux.
kddsqlz.c,v 2 2016:meva Exp $ 20 2211:3 stg.lux.
kddstr.c,v 2 2016:meva Exp $ 20 2211:3 stg_.lnx
kdduexe.cpp, 2 2016:meva Exp $ 20 2211:3 stg_.lnx
kddufil.cpp, 2 2016:meva Exp $ 20 2211:3 stg_.lnx
kddutil.c,v 2 2016:meva Exp $ 20 2211:3 stg_.lnx
kddutl2.cpp, 3 2017:smyu Exp $ 20 2211:3 stg_.lnx
koysmon.cpp, 2 2016:meva Exp $ 20 2211:3 stg.lux.
CML1526I (011444) Stack: soft limit: 10485760 hard limit: -1
UOI0055E KDDUTIL(1282) Could not find a kddproc.ctl file
GGN1505S (011444) Time = 2009/09/28 01:14:44, Collector ended
COL9999S (011444) Exiting collector at Mon Sep 28 01:14:44 2009
|
问题到这个地方就卡住了,无法分析了。kor10col所以来的库文件,肯定某个里面缺少变量sqlcxt,但是哪个?无从查起。
峰回路转是装tivoli的哥们说在红帽上可以的。我直接装了一个红帽5的环境,装上此tivoli monitor for oracle,执行过程中就没有报错。执行ldd的时候就发现不一样的地方:
红帽5:
ldd /opt/IBM/ITM/li6263/bin/kor10col
libclntsh.so.10.1 => /opt/app/oracle/product/10.1.0/db_1/lib/libclntsh.so.10.1 (0x40170000)
SecurityOS4.0(Asianux3SP2):
ldd /opt/IBM/ITM/li6263/bin/kor10col
libclntsh.so.10.1 => /usr/lib/libclntsh.so.10.1 (0x40180000)
|
查看红帽上并没有/usr/lib/libclntsh.so.10.1,Asianux上此文件属于php-oci8-5.1.6-15.1AX,将此包删除。问题解决。
[root@fan3838 /]# rpm -qf /usr/lib/libclntsh.so.10.1
php-oci8-5.1.6-15.1AX
|
后记:当看到php-oci8的时候,我就想起以前好像遇到过一次,但是在IBM钻石大厦和IBM的哥们一起测试,最后也是对比找到问题,也是删除此软件包。赶紧查邮件,发现07年1月,“河南地税使用红旗DC4.1运行IBM Tivoli Monitoring 6.1,其中for oracle的agent运行不正常。经过和IBM工程师共同测试,确定原因是DC4.1上的php-oci8软件包对Tivoli有影响。”一样的问题,一样的解决,产品没有改,我的大脑记忆中也没有记住。
阅读(2567) | 评论(1) | 转发(0) |