分类: 服务器与存储
2008-03-24 18:48:08
做sfrac4.1到sfrac5.0的升级,突然发现有3个端口未启动。下面是排错过程。
root@lxsfrac07 # gabconfig -a
===============================================================
Port a gen 390201 membership 01
Port b gen 390205 membership 01
Port d gen 390204 membership 01
Port f gen 390211 membership 01
Port h gen 39020a membership 01
root@lxsfrac07 # hastatus
attempting to connect....connected
group resource system message
--------------- -------------------- -------------------- --------------------
lxsfrac07 RUNNING
lxsfrac08 RUNNING
cvm lxsfrac07 OFFLINE
cvm lxsfrac08 OFFLINE
-------------------------------------------------------------------------
vxfsckd lxsfrac07 OFFLINE
vxfsckd lxsfrac08 OFFLINE
cvm_clus lxsfrac07 OFFLINE
cvm_clus lxsfrac08 OFFLINE
cvm_vxconfigd lxsfrac07 ONLINE
-------------------------------------------------------------------------
cvm_vxconfigd lxsfrac08 ONLINE
root@lxsfrac07 # tail -f engine_A.log
2008/02/20 13:14:08 VCS NOTICE V-16-1-10322 System lxsfrac08 (Node '1') changed state from UNKNOWN to INITING
2008/02/20 13:14:08 VCS NOTICE V-16-1-10322 System lxsfrac08 (Node '1') changed state from INITING to CURRENT_DISCOVER_WAIT
2008/02/20 13:14:08 VCS NOTICE V-16-1-10322 System lxsfrac08 (Node '1') changed state from CURRENT_DISCOVER_WAIT to REMOTE_BUILD
2008/02/20 13:14:08 VCS INFO V-16-1-10463 Sending snapshot to node: 1
2008/02/20 13:14:09 VCS NOTICE V-16-1-10322 System lxsfrac08 (Node '1') changed state from REMOTE_BUILD to RUNNING
2008/02/20 13:14:09 VCS ERROR V-16-10001-1005 (lxsfrac08) CVMCluster:???:monitor:node - state: out of cluster
2008/02/20 13:14:10 VCS INFO V-16-1-10297 Resource cvm_vxconfigd (Owner: unknown, Group: cvm) is online on lxsfrac08 (First probe)
2008/02/20 13:14:10 VCS INFO V-16-1-10304 Resource cvm_clus (Owner: unknown, Group: cvm) is offline on lxsfrac08 (First probe)
2008/02/20 13:14:10 VCS INFO V-16-1-10304 Resource vxfsckd (Owner: unknown, Group: cvm) is offline on lxsfrac08 (First probe)
2008/02/20 13:14:10 VCS NOTICE V-16-1-10438 Group cvm has been probed on system lxsfrac08
2008/02/20 13:18:01 VCS INFO V-16-1-50135 User root fired command: hares -online cvm_clus lxsfrac07 from localhost
2008/02/20 13:19:05 VCS ERROR V-16-10001-1005 (lxsfrac07) CVMCluster:???:monitor:node - state: out of cluster
2008/02/20 13:19:10 VCS ERROR V-16-10001-1005 (lxsfrac08) CVMCluster:???:monitor:node - state: out of cluster
2008/02/20 13:24:05 VCS ERROR V-16-10001-1005 (lxsfrac07) CVMCluster:???:monitor:node - state: out of cluster
2008/02/20 13:24:11 VCS ERROR V-16-10001-1005 (lxsfrac08) CVMCluster:???:monitor:node - state: out of cluster
root@lxsfrac07 # hares -online cvm_clus -sys lxsfrac07
VCS WARNING V-16-1-10276 Group cvm for resource cvm_clus is frozen
root@lxsfrac07 # diff -c main.cf_080214 main.cf
*** main.cf_080214 Thu Feb 14 15:13:06 2008
--- main.cf Wed Feb 20 11:05:19 2008
***************
*** 1,12 ****
include "types.cf"
include "CFSTypes.cf"
include "CVMTypes.cf"
- include "MultiPrivNIC.cf"
include "OracleTypes.cf"
- include "PrivNIC.cf"
cluster wylrac (
! UserNames = { admin = IhiAhcHeiDiiGqiChf }
Administrators = { admin }
HacliUserLevel = COMMANDROOT
)
--- 1,10 ----
include "types.cf"
include "CFSTypes.cf"
include "CVMTypes.cf"
include "OracleTypes.cf"
cluster wylrac (
! UserNames = { admin = ghiAhcHeiDiiGqiChf }
Administrators = { admin }
HacliUserLevel = COMMANDROOT
)
***************
*** 19,24 ****
--- 17,23 ----
group cvm (
SystemList = { lxsfrac07 = 0, lxsfrac08 = 1 }
+ Frozen = 1
AutoFailOver = 0
Parallel = 1
AutoStartList = { lxsfrac07, lxsfrac08 }
root@lxsfrac07 # more main.cf_080214
include "types.cf"
include "CFSTypes.cf"
include "CVMTypes.cf"
include "MultiPrivNIC.cf"
include "OracleTypes.cf"
include "PrivNIC.cf"
cluster wylrac (
UserNames = { admin = IhiAhcHeiDiiGqiChf }
Administrators = { admin }
HacliUserLevel = COMMANDROOT
)
system lxsfrac07 (
)
system lxsfrac08 (
)
group cvm (
SystemList = { lxsfrac07 = 0, lxsfrac08 = 1 }
AutoFailOver = 0
Parallel = 1
AutoStartList = { lxsfrac07, lxsfrac08 }
)
CFSfsckd vxfsckd (
)
CVMCluster cvm_clus (
CVMClustName = wylrac
CVMNodeId = { lxsfrac07 = 0, lxsfrac08 = 1 }
CVMTransport = gab
CVMTimeout = 200
)
CVMVxconfigd cvm_vxconfigd (
Critical = 0
CVMVxconfigdArgs = { syslog }
)
cvm_clus requires cvm_vxconfigd
vxfsckd requires cvm_clus
// resource dependency tree
//
// group cvm
// {
root@lxsfrac07 # more main.cf
include "types.cf"
include "CFSTypes.cf"
include "CVMTypes.cf"
include "OracleTypes.cf"
cluster wylrac (
UserNames = { admin = ghiAhcHeiDiiGqiChf }
Administrators = { admin }
HacliUserLevel = COMMANDROOT
)
system lxsfrac07 (
)
system lxsfrac08 (
)
group cvm (
SystemList = { lxsfrac07 = 0, lxsfrac08 = 1 }
Frozen = 1
AutoFailOver = 0
Parallel = 1
AutoStartList = { lxsfrac07, lxsfrac08 }
)
CFSfsckd vxfsckd (
)
CVMCluster cvm_clus (
CVMClustName = wylrac
CVMNodeId = { lxsfrac07 = 0, lxsfrac08 = 1 }
CVMTransport = gab
CVMTimeout = 200
)
CVMVxconfigd cvm_vxconfigd (
Critical = 0
CVMVxconfigdArgs = { syslog }
)
cvm_clus requires cvm_vxconfigd
vxfsckd requires cvm_clus
root@lxsfrac07 # hagrp -unfreeze cvm -persistent
VCS WARNING V-16-1-11309 Configuration must be ReadWrite
root@lxsfrac07 # haconf -makerw
root@lxsfrac07 # hagrp -unfreeze cvm -persistent
root@lxsfrac07 # haconf -dump –makero
root@lxsfrac07 # hastatus
attempting to connect....connected
group resource system message
--------------- -------------------- -------------------- --------------------
lxsfrac07 RUNNING
lxsfrac08 RUNNING
cvm lxsfrac07 OFFLINE
cvm lxsfrac08 OFFLINE
-------------------------------------------------------------------------
vxfsckd lxsfrac07 OFFLINE
vxfsckd lxsfrac08 OFFLINE
cvm_clus lxsfrac07 OFFLINE
cvm_clus lxsfrac08 OFFLINE
cvm_vxconfigd lxsfrac07 ONLINE
-------------------------------------------------------------------------
cvm_vxconfigd lxsfrac08 ONLINE
^C
root@lxsfrac07 # hares -online cvm -sys lxsfrac07
root@lxsfrac07 # hagrp -online cvm -sys lxsfrac08
root@lxsfrac07 # hastatus
attempting to connect....connected
group resource system message
--------------- -------------------- -------------------- --------------------
lxsfrac07 RUNNING
lxsfrac08 RUNNING
cvm lxsfrac07 ONLINE
cvm lxsfrac08 STARTING OFFLINE
-------------------------------------------------------------------------
vxfsckd lxsfrac07 ONLINE
vxfsckd lxsfrac08 OFFLINE
vxfsckd lxsfrac08 WAITING FOR CHILDREN ONLINE
cvm_clus lxsfrac07 ONLINE
cvm_clus lxsfrac08 OFFLINE
-------------------------------------------------------------------------
cvm_clus lxsfrac08 WAITING FOR ONLINE
cvm_vxconfigd lxsfrac07 ONLINE
cvm_vxconfigd lxsfrac08 ONLINE
cvm_clus lxsfrac08 ONLINE
cvm lxsfrac08 STARTING PARTIAL
-------------------------------------------------------------------------
vxfsckd lxsfrac08 WAITING FOR ONLINE
vxfsckd lxsfrac08 ONLINE
cvm lxsfrac08 ONLINE
^C
root@lxsfrac07 # hastatus
attempting to connect....connected
group resource system message
--------------- -------------------- -------------------- --------------------
lxsfrac07 RUNNING
lxsfrac08 RUNNING
cvm lxsfrac07 ONLINE
cvm lxsfrac08 ONLINE
-------------------------------------------------------------------------
vxfsckd lxsfrac07 ONLINE
vxfsckd lxsfrac08 ONLINE
cvm_clus lxsfrac07 ONLINE
cvm_clus lxsfrac08 ONLINE
cvm_vxconfigd lxsfrac07 ONLINE
-------------------------------------------------------------------------
cvm_vxconfigd lxsfrac08 ONLINE
root@lxsfrac07 # gabconfig -a
===============================================================
Port a gen 390201 membership 01
Port b gen 390205 membership 01
Port d gen 390204 membership 01
Port f gen 390211 membership 01
Port h gen 39020a membership 01
Port o gen 390203 membership 01
Port v gen 39020d membership 01
Port w gen 39020f membership 01
后记:
其实原因也很简单,sfrac从4.1升级到5.0后会把cvm group冻结,只需要解冻即可(可以用命令行,也可以直接修改main.cf文件)。但该排错过程涵盖了基本方法:
1) gabconfig –a
2) Hastatus及hares -state
3) 检查vcs日志
一点小经验:o,v,w端口不能启动基本与vcs有关。
后来看过sfrac宫方英文升级文档,里面写得很清楚,升级前都要备份之前的main.cf,升级完毕后用旧的main.cf来代替新的。汗自己一个,没有毅力看宫方英文文档。以后要努力改正!