脚踏实地、勇往直前!
全部博文(1005)
分类: Oracle
2017-07-03 19:56:45
环境:
OS:CentOS 7
DB:11.2.0.4
1.查看当前的asm磁盘
[root@host01 bin]# oracleasm listdisks
DATA1
DATA2
DATA3
DATA4
DATA5
OCR1
OCR2
OCR3
REDO1
REDO2
2.查看ocr磁盘组
[root@host01 bin]# ./crsctl query css votedisk
## STATE File Universal Id File Name Disk group
-- ----- ----------------- --------- ---------
1. ONLINE 94fb7b495e204fdabf9f5ceec8d6f86e (ORCL:OCR1) [OCR]
2. ONLINE 3386c068bd524f6bbf3c15d383913ca6 (ORCL:OCR2) [OCR]
3. ONLINE 87d54edfeaa44f48bfa789d3b083f856 (ORCL:OCR3) [OCR]
Located 3 voting disk(s).
3.查找准备破坏OCR1
先找到对应的物理磁盘
[root@host01 bin]# oracleasm querydisk -d OCR1
Disk "OCR1" is a valid ASM disk on device [8,17]
[root@host01 dev]# ls -al sd*
brw-rw---- 1 root disk 8, 0 Jul 3 08:56 sda
brw-rw---- 1 root disk 8, 1 Jul 3 08:56 sda1
brw-rw---- 1 root disk 8, 2 Jul 3 08:56 sda2
brw-rw---- 1 root disk 8, 16 Jul 3 08:56 sdb
brw-rw---- 1 root disk 8, 17 Jul 3 08:56 sdb1
brw-rw---- 1 root disk 8, 32 Jul 3 08:56 sdc
brw-rw---- 1 root disk 8, 33 Jul 3 08:56 sdc1
brw-rw---- 1 root disk 8, 48 Jul 3 08:56 sdd
brw-rw---- 1 root disk 8, 49 Jul 3 08:56 sdd1
brw-rw---- 1 root disk 8, 64 Jul 3 08:56 sde
brw-rw---- 1 root disk 8, 65 Jul 3 08:56 sde1
brw-rw---- 1 root disk 8, 80 Jul 3 08:56 sdf
brw-rw---- 1 root disk 8, 81 Jul 3 08:56 sdf1
brw-rw---- 1 root disk 8, 96 Jul 3 08:56 sdg
brw-rw---- 1 root disk 8, 97 Jul 3 08:56 sdg1
brw-rw---- 1 root disk 8, 112 Jul 3 08:56 sdh
brw-rw---- 1 root disk 8, 113 Jul 3 08:56 sdh1
brw-rw---- 1 root disk 8, 128 Jul 3 08:56 sdi
brw-rw---- 1 root disk 8, 129 Jul 3 08:56 sdi1
brw-rw---- 1 root disk 8, 144 Jul 3 08:56 sdj
brw-rw---- 1 root disk 8, 145 Jul 3 08:56 sdj1
brw-rw---- 1 root disk 8, 160 Jul 3 08:56 sdk
brw-rw---- 1 root disk 8, 161 Jul 3 08:56 sdk1
4.模拟整个磁盘彻底损坏
[root@host01 dev]# dd if=/dev/zero of=/dev/sdb
dd: writing to ‘/dev/sdb’: No space left on device
2097153+0 records in
2097152+0 records out
1073741824 bytes (1.1 GB) copied, 44.5948 s, 24.1 MB/s
5.重启机器后检查crs
[root@host02 bin]# ./crsctl check crs
CRS-4638: Oracle High Availability Services is online
CRS-4535: Cannot communicate with Cluster Ready Services
CRS-4529: Cluster Synchronization Services is online
CRS-4534: Cannot communicate with Event Manager
[root@host01 bin]# ./crsctl check crs
CRS-4638: Oracle High Availability Services is online
CRS-4535: Cannot communicate with Cluster Ready Services
CRS-4529: Cluster Synchronization Services is online
CRS-4534: Cannot communicate with Event Manager
6.检查asm磁盘
[root@host02 bin]# oracleasm listdisks
DATA1
DATA2
DATA3
DATA4
DATA5
OCR2
OCR3
REDO1
REDO2
发现少了OCR1,我们先把加上
[root@host01 ~]# /etc/init.d/oracleasm createdisk OCR1 /dev/sdb1
Marking disk "OCR1" as an ASM disk: [FAILED]
发现失败,查看日志
[root@host01 ~]# more /var/log/oracleasm
最后几行
Unable to query file "/dev/sdb1": No such file or directory
Unable to query file "/dev/sdb1": No such file or directory
Unable to query file "/dev/sdb1": No such file or directory
[root@host01 ~]# fdisk -l /dev/sdb
Disk /dev/sdb: 1073 MB, 1073741824 bytes, 2097152 sectors
Units = sectors of 1 * 512 = 512 bytes
Sector size (logical/physical): 512 bytes / 512 bytes
I/O size (minimum/optimal): 512 bytes / 512 bytes
发现没有sdb1了,分区没有了,重新分区 fdisk /dev/sdb
再次创建磁盘
[root@host01 ~]# /etc/init.d/oracleasm createdisk OCR1 /dev/sdb1
Marking disk "OCR1" as an ASM disk: [ OK ]
另外一个节点重新搜索asm磁盘
[root@host02 ~]# oracleasm scandisks
Reloading disk partitions: done
Cleaning any stale ASM disks...
Scanning system for ASM disks...
Instantiating disk "OCR1"
6.停止所有节点的CRS服务
[root@host01 bin]# /etc/init.d/ohasd stop
[root@host02 bin]# /etc/init.d/ohasd stop
7.在一个节点集群启动到独占模式
[root@host02 bin]# ./crsctl start crs -excl -nocrs
[root@host02 bin]# ps -ef|grep d.bin
[root@host02 bin]# ps -ef|grep ASM
8.创建用于OCR和VoteDisk的ASM磁盘组,磁盘磁盘名称跟之前的保持一致,叫做OCR
[root@host02 bin]# su - grid
Last login: Mon Jul 3 09:25:49 CST 2017 on pts/1
[grid@host02 ~]$ sqlplus /nolog
SQL*Plus: Release 11.2.0.4.0 Production on Mon Jul 3 17:54:29 2017
Copyright (c) 1982, 2013, Oracle. All rights reserved.
SQL> connect / as sysdba
Connected.
SQL> create diskgroup OCR normal redundancy disk 'ORCL:OCR1','ORCL:OCR2','ORCL:OCR3' ATTRIBUTE 'compatible.rdbms' = '11.2', 'compatible.asm' = '11.2';
create diskgroup OCR normal redundancy disk 'ORCL:OCR1','ORCL:OCR2','ORCL:OCR3'
*
ERROR at line 1:
ORA-15018: diskgroup cannot be created
ORA-15033: disk 'ORCL:OCR3' belongs to diskgroup "OCR"
ORA-15033: disk 'ORCL:OCR2' belongs to diskgroup "OCR"
清楚磁盘头信息
dd if=/dev/zero of=/dev/sdc bs=1024 count=1000
dd if=/dev/zero of=/dev/sdd bs=1024 count=1000
然后重新分区
fdisk /dev/sdc
fdisk /dev/sdd
删除磁盘
/etc/init.d/oracleasm deletedisk OCR2
/etc/init.d/oracleasm deletedisk OCR3
重新创建磁盘
/etc/init.d/oracleasm createdisk OCR2 /dev/sdc1
/etc/init.d/oracleasm createdisk OCR3 /dev/sdd1
9.查看ocr备份
查看到备份在节点2
[root@host02 bin]# ./ocrconfig -showbackup
PROT-26: Oracle Cluster Registry backup locations were retrieved from a local copy
host02 2017/07/03 13:38:37 /u01/oracle/app/crs_home/cdata/rac-cluster/backup00.ocr
host02 2017/07/03 13:38:37 /u01/oracle/app/crs_home/cdata/rac-cluster/day.ocr
host02 2017/07/03 13:38:37 /u01/oracle/app/crs_home/cdata/rac-cluster/week.ocr
PROT-25: Manual backups for the Oracle Cluster Registry are not available
10.从最近的备份来恢复OCR
在节点2上做恢复
[root@host02 bin]#./ocrconfig -restore /u01/oracle/app/crs_home/cdata/rac-cluster/backup00.ocr
11.重建Votedisk磁盘
[root@host02 bin]# ./crsctl replace votedisk +OCR
Successful addition of voting disk 1eab53162d414fa1bf3414621c2b4ecc.
Successful addition of voting disk 3b1d2733071f4f45bff2c13f65fbbf1d.
Successful addition of voting disk 8b5073d81f564f15bf85d006b623619c.
Successfully replaced voting disk group with +OCR.
CRS-4266: Voting file(s) successfully replaced
12.重启CRS到正常模式
[root@host02 bin]#./crsctl stop crs
[root@host02 bin]#./crsctl start crs
[root@host01 bin]# ./crsctl check crs
CRS-4638: Oracle High Availability Services is online
CRS-4537: Cluster Ready Services is online
CRS-4529: Cluster Synchronization Services is online
CRS-4533: Event Manager is online
[root@host02 bin]# ./crsctl check crs
CRS-4638: Oracle High Availability Services is online
CRS-4537: Cluster Ready Services is online
CRS-4529: Cluster Synchronization Services is online
CRS-4533: Event Manager is online
13.重新创建spfile
在其中一个节点上创建spfile,我这里是在节点2创建
SQL> create spfile='+OCR' from pfile='/tmp/pfile_asm2.ora';
File created.
pfile_asm2.ora文件内如如下:
[root@host02 ~]# more /tmp/pfile_asm2.ora
*.asm_diskstring='/dev/asm*'
*.asm_power_limit=1
*.instance_type='asm'
*.large_pool_size=12M
*.remote_login_passwordfile='EXCLUSIVE'
重启CRS
[root@host02 bin]# ./crsctl stop crs
[root@host02 bin]# ./crsctl start crs
-- The End --