分类: Oracle
2016-01-24 10:20:52
--1.构造千万级数据,并插入一些重复数据,共有42条重复,保留一份,则保留21条即可 dingjun123@ORADB> create table t as select * from dba_objects; Table created. Elapsed: 00:00:00.29 dingjun123@ORADB> insert into t select * from (select * from t order by dbms_random.value) where rownum<10; 9 rows created. Elapsed: 00:00:00.17 dingjun123@ORADB> insert into t select * from (select * from t order by dbms_random.value) where rownum<3; 2 rows created. Elapsed: 00:00:00.11 dingjun123@ORADB> / 2 rows created. Elapsed: 00:00:00.12 dingjun123@ORADB> / 2 rows created. Elapsed: 00:00:00.11 dingjun123@ORADB> / 2 rows created. Elapsed: 00:00:00.11 dingjun123@ORADB> / 2 rows created. Elapsed: 00:00:00.12 dingjun123@ORADB> / 2 rows created. Elapsed: 00:00:00.11 dingjun123@ORADB> commit; Commit complete. dingjun123@ORADB> insert into t select owner,object_name,subobject_name,(select max(object_id) from t)+rownum,data_object_id,object_type,created,LAST_DDL_TIME,TIMESTAMP,STATUS, 2 TEMPORARY,GENERATED,SECONDARY,NAMESPACE,EDITION_NAME from t; 75313 rows created. Elapsed: 00:00:00.15 dingjun123@ORADB> / 150626 rows created. Elapsed: 00:00:00.27 dingjun123@ORADB> / 301252 rows created. Elapsed: 00:00:00.59 dingjun123@ORADB> / 602504 rows created. Elapsed: 00:00:03.50 dingjun123@ORADB> / 1205008 rows created. Elapsed: 00:00:09.65 dingjun123@ORADB> commit; Commit complete. Elapsed: 00:00:00.00 dingjun123@ORADB> insert into t select owner,object_name,subobject_name,(select max(object_id) from t)+rownum,data_object_id,object_type,created,LAST_DDL_TIME,TIMESTAMP,STATUS, 2 TEMPORARY,GENERATED,SECONDARY,NAMESPACE,EDITION_NAME from t; 2410016 rows created. Elapsed: 00:00:14.41 dingjun123@ORADB> / 4820032 rows created. Elapsed: 00:00:41.13 dingjun123@ORADB> commit; Commit complete. Elapsed: 00:00:00.00 --2.关键,建立exceptions表,然后建立唯一约束,有重复的rowid信息自动放入exceptions表里,也就是42条重复记录的row_id会放到exceptions 表里 dingjun123@ORADB> create table exceptions(row_id rowid, 2 owner varchar2(30), 3 table_name varchar2(30), 4 constraint varchar2(30)); Table created. Elapsed: 00:00:00.01 --可以加nologging,parallel提高效率,查找重复记录非常快 dingjun123@ORADB> alter table t add constraint uk_t unique(object_id,object_name,owner) 2 exceptions into EXCEPTIONS NOLOGGING PARALLEL 4 ; alter table t add constraint uk_t unique(object_id,object_name,owner) * ERROR at line 1: ORA-02299: cannot validate (DINGJUN123.UK_T) - duplicate keys found Elapsed: 00:00:53.53 dingjun123@ORADB> select count(*) from t; COUNT(*) ---------- 9640064 1 row selected. Elapsed: 00:00:01.56 --获得重复记录 dingjun123@ORADB> select count(*) from exceptions; COUNT(*) ---------- 42 1 row selected. Elapsed: 00:00:00.01 --3.插入重复记录到临时表dups中 dingjun123@ORADB> create table dups 2 as 3 select * 4 FROM t 5 where rowid in ( select row_id from exceptions ); Table created. Elapsed: 00:00:00.07 --4.删除所有重复记录 dingjun123@ORADB> delete from t where rowid in ( select row_id from 2 exceptions ); 42 rows deleted. Elapsed: 00:00:00.02 --5.重复记录保留一份 dingjun123@ORADB> insert into t SELECT owner,object_name,subobject_name,(select max(object_id) from t)+rownum,data_object_id,object_type,created,LAST_DDL_TIME,TIMESTAMP,STATUS, TEMPORARY,GENERATED,SECONDARY,NAMESPACE,EDITION_NAME FROM ( select owner,object_name,subobject_name,object_id,data_object_id,object_type,created,LAST_DDL_TIME,TIMESTAMP,STATUS, TEMPORARY,GENERATED,SECONDARY,NAMESPACE,EDITION_NAME, row_number() OVER(PARTITION BY object_id,object_name,owner ORDER BY ROWID) rn from dups )WHERE rn=1; 21 rows created. Elapsed: 00:00:00.01 dingjun123@ORADB> commit; Commit complete. Elapsed: 00:00:00.01 |