Chinaunix首页 | 论坛 | 博客
  • 博客访问: 2769132
  • 博文数量: 389
  • 博客积分: 4177
  • 博客等级: 上校
  • 技术积分: 4773
  • 用 户 组: 普通用户
  • 注册时间: 2008-11-16 23:29
文章分类

全部博文(389)

分类: Oracle

2013-12-04 22:25:51

                           观察CBO收集统计信息的过程
 

 dbms_stats在收集统计表的统计信息的时候,究竟是怎么计算的。可以通过10046事件来观察这个过程

以sys用户给frank用户创建一个表

SQL>  create table frank.t1 pctfree 70 pctused 30 as select * from dba_tables;
Table created
设置较大的pctfree ,模拟占用很大空间.

以frank用户,收集统计
SQL> exec dbms_stats.gather_table_stats(ownname=>'FRANK',tabname=>'T1');

PL/SQL procedure successfully completed.

从10046事件的跟踪文件来看,


SELECT SPARE4 FROM SYS.OPTSTAT_HIST_CONTROL$ WHERE SNAME = :B1

执行了很多这样的语句,这个语句主要定义的一些参数的设置,如degree,autostats_target,incremental等
这个主要是全局级别,由set_global_prefs过程来设置.像degug,trace和APPROXIMATE_NDV这种参数oracle没有
公开.

 

SELECT P.VALCHAR FROM SYS.OPTSTAT_USER_PREFS$ P, OBJ$ O, USER$ U WHERE P.OBJ#=O.OBJ#
AND U.USER#=O.OWNER# AND U.NAME=:B3 AND O.NAME=:B2 AND P.PNAME=:B1

这个表定义了对像级别一些设置,如stale_precent,degress,可以通过set_table_prefs过程来设置
dba_tab_stat_prefs这个视图底层的表就是查询该表,以得到对像级别的设置


SELECT /*+ ordered index(u) index(o) index(po) */ MOD(PO.SPARE2, 256), PO.PARTTYPE FROM SYS.USER$ U, SYS.OBJ$ O, SYS.PARTOBJ$ PO
WHERE U.NAME = :B3 AND O.NAME = :B2 AND O.NAMESPACE = :B1 AND U.USER# = O.OWNER# AND O.OBJ# = PO.OBJ#
查询用户表是不是分区表和分区类型.


select /*+ rule */ bucket_cnt, row_cnt, cache_cnt, null_cnt, timestamp#, s
ample_size, minimum, maximum, distcnt, lowval, hival, density, col#, spare1, spare2, avgcln
from hist_head$ where obj#=:1 and intcol#=:2
找出原表的栏位统计信息,这种语句有很多条,具体取决于栏位数.

select /*+ rule */ bucket, endpoint, col#, epvalue from histgrm$ where obj#=:1 and intcol#=:2 and row#=:3 order by bucket

找出原表的直方图信息


SELECT /*+ first_rows(1) */ '"'||OI.NAME||'"' FROM SYS.USER$ U, SYS.OBJ$ OT, SYS.IND$ I, SYS.OBJ$ OI
WHERE U.NAME = :B2 AND OT.OWNER# = U.USER# AND OT.NAME = :B1 AND I.BO# = OT.OBJ# AND I.TYPE# = 4 AND I.OBJ# = OI.OBJ#

找出表的索引信息


select t.ts#,t.file#,t.block#,nvl(t.bobj#,0),nvl(t.tab#,0),t.intcols,nvl(t.clucols,0),t.audit$,t.flags,t.pctfree$,
t.pctused$,t.initrans,t.maxtrans,t.rowcnt,t.blkcnt,t.empcnt,t.avgspc,t.chncnt,t.avgrln,t.analyzetime,t.samplesize,
t.cols,t.property,nvl(t.degree,1),nvl(t.instances,1),t.avgspc_flb,t.flbcnt,t.kernelcols,nvl(t.trigflag, 0),nvl(t.spare1,0),nvl(t.spare2,0),
t.spare4,t.spare6,ts.cachedblk,ts.cachehit,ts.logicalread from tab$ t, tab_stats$ ts where t.obj#= :1 and t.obj# = ts.obj# (+)

找出表所在的segment的信息,如inittrace,pctfree

select i.obj#,i.ts#,i.file#,i.block#,i.intcols,i.type#,i.flags,i.property,i.pctfree$,i.initrans,i.maxtrans,i.blevel,
i.leafcnt,i.distkey,i.lblkkey,i.dblkkey,i.clufac,i.cols,i.analyzetime,i.samplesize,i.dataobj#,nvl(i.degree,1),nvl(i.instances,1),
i.rowcnt,mod(i.pctthres$,256),i.indmethod#,i.trunccnt,nvl(c.unicols,0),nvl(c.deferrable#+c.valid#,0),nvl(i.spare1,i.intcols),i.spare4,
i.spare2,i.spare6,decode(i.pctthres$,null,null,mod(trunc(i.pctthres$/256),256)),ist.cachedblk,ist.cachehit,ist.logicalread from ind$ i,
ind_stats$ ist, (select enabled, min(cols) unicols,min(to_number(bitand(defer,1))) deferrable#,min(to_number(bitand(defer,4))) valid#
 from cdef$ where obj#=:1 and enabled > 1 group by enabled) c where i.obj#=c.enabled(+) and i.obj# = ist.obj#(+) and i.bo#=:1 order by i.obj#

查找表的索引的segment的信息.

SELECT COUNT(UNQ) UNQ, COUNT(PFX) PFX FROM (SELECT /*+ first_rows(1) leading(cc) */ CD.TYPE# UNQ, NULL PFX
FROM SYS.CCOL$ CC, SYS.CDEF$ CD WHERE CC.OBJ# = :B2 AND CC.INTCOL# = :B1 AND CD.CON# = CC.CON# AND CD.OBJ# = CC.OBJ#
AND CD.ENABLED IS NOT NULL AND CD.INTCOLS = 1 AND CD.TYPE# IN (2,3) AND BITAND(CD.DEFER, 2+4) = 4 AND ROWNUM < 2
UNION ALL SELECT /*+ first_rows(1) leading(i) */ CASE WHEN I.INTCOLS = 1 AND BITAND(I.PROPERTY,1) = 1 THEN 3 ELSE NULL END UNQ,
 CASE WHEN IC.POS# = 1 THEN 1 ELSE NULL END PFX FROM SYS.IND$ I, SYS.ICOL$ IC WHERE I.BO# = :B2 AND I.BO# = IC.BO# AND IC.INTCOL# = :B1
AND I.OBJ# = IC.OBJ# AND BITAND(I.FLAGS,1025) = 0 AND ROWNUM < 2 )
又是很多类似关于栏位的语句,每个栏位对应一条类似的语句

select /*+ full(t) no_parallel(t) no_parallel_index(t) dbms_stats cursor_sharing_exact use_weak_name_resl dynamic_sampling(0) no_monitoring */
to_char(count(SYS_OP_COMBINED_HASH("OBJECT_NAME","OBJECT_ID"))),
to_char(substrb(dump(min(SYS_OP_COMBINED_HASH("OBJECT_NAME","OBJECT_ID")),16,0,32),1,120)),
to_char(substrb(dump(max(SYS_OP_COMBINED_HASH("OBJECT_NAME","OBJECT_ID")),16,0,32),1,120)),
to_char(count("OWNER")),
to_char(substrb(dump(min("OWNER"),16,0,32),1,120)),to_char(substrb(dump(max("OWNER"),16,0,32),1,120)),
to_char(count("OBJECT_NAME")),to_char(substrb(dump(min("OBJECT_NAME"),16,0,32),1,120)),
to_char(substrb(dump(max("OBJECT_NAME"),16,0,32),1,120)),to_char(count("SUBOBJECT_NAME")),
to_char(substrb(dump(min("SUBOBJECT_NAME"),16,0,32),1,120)),to_char(substrb(dump(max("SUBOBJECT_NAME"),16,0,32),1,120)),
to_char(count("OBJECT_ID")),to_char(substrb(dump(min("OBJECT_ID"),16,0,32),1,120)),
to_char(substrb(dump(max("OBJECT_ID"),16,0,32),1,120)),to_char(count("DATA_OBJECT_ID"))
,to_char(substrb(dump(min("DATA_OBJECT_ID"),16,0,32),1,120)),
to_char(substrb(dump(max("DATA_OBJECT_ID"),16,0,32),1,120)),
to_char(count("OBJECT_TYPE")),to_char(substrb(dump(min("OBJECT_TYPE"),16,0,32),1,120)),
to_char(substrb(dump(max("OBJECT_TYPE"),16,0,32),1,120)),
to_char(count("CREATED")),to_char(substrb(dump(min("CREATED"),16,0,32),1,120)),
to_char(substrb(dump(max("CREATED"),16,0,32),1,120)),to_char(count("LAST_DDL_TIME")),
to_char(substrb(dump(min("LAST_DDL_TIME"),16,0,32),1,120)),
to_char(substrb(dump(max("LAST_DDL_TIME"),16,0,32),1,120)),to_char(count("TIMESTAMP")),
to_char(substrb(dump(min("TIMESTAMP"),16,0,32),1,120)),to_char(substrb(dump(max("TIMESTAMP"),16,0,32),1,120)),
to_char(count("STATUS")),to_char(substrb(dump(min("STATUS"),16,0,32),1,120)),to_char(substrb(dump(max("STATUS"),16,0,32),1,120))
,to_char(count("TEMPORARY")),to_char(substrb(dump(min("TEMPORARY"),16,0,32),1,120)),to_char(substrb(dump(max("TEMPORARY"),16,0,32),1,120)),
to_char(count("GENERATED")),to_char(substrb(dump(min("GENERATED"),16,0,32),1,120)),to_char(substrb(dump(max("GENERATED"),16,0,32),1,120)),
to_char(count("SECONDARY")),to_char(substrb(dump(min("SECONDARY"),16,0,32),1,120)),to_char(substrb(dump(max("SECONDARY"),16,0,32),1,120)),to_char(count("NAMESPACE")),
to_char(substrb(dump(min("NAMESPACE"),16,0,32),1,120)),to_char(substrb(dump(max("NAMESPACE"),16,0,32),1,120)),to_char(count("EDITION_NAME")),to_char(substrb(dump(min("EDITION_NAME"),16,0,32),1,120)),
to_char(substrb(dump(max("EDITION_NAME"),16,0,32),1,120)) from "FRANK"."T1" t

计算该表的统计信息,如last_ddl_time,这个语句会使用全表扫描.产生很多db file scattered read事件.

select min(minbkt),maxbkt,substrb(dump(min(val),16,0,32),1,120) minval,substrb(dump(max(val),16,0,32),1,120) maxval,
sum(rep) sumrep, sum(repsq) sumrepsq, max(rep) maxrep, count(*) bktndv, sum(case when rep=1 then 1 else 0 end) unqrep
 from (select val,min(bkt) minbkt, max(bkt) maxbkt, count(val) rep, count(val)*count(val) repsq from
(select /*+ no_parallel(t) no_parallel_index(t) dbms_stats cursor_sharing_exact use_weak_name_resl dynamic_sampling(0) no_monitoring */"OBJECT_ID" val,
 ntile(254) over (order by "OBJECT_ID") bkt  from "FRANK"."T1" sample (   .1236902329) t  where "OBJECT_ID" is not null) group by val) g
roup by maxbkt order by maxbkt


这里才有体现sample_size,难道是我以前没有收集,所以上个语句使用100%? 这个语句产生direct path read和
db file sequential read而对就的obj#却是78587(frank.t1)
WAIT #10: nam='db file sequential read' ela= 5 file#=4 block#=30913 blocks=1 obj#=78587 tim=1386041814063073

sample (   .1236902329)使用的是自动计算的,好像是一个很复杂的过程,在<高性能SQL引擎>有介绍.


delete /*+ index(m) */ from sys.mon_mods$ m where obj# = :1

删除跟踪表的对应的obj#记录,这个表记录生个表的dml发生的次数.

update tab$ set ts#=:2,file#=:3,block#=:4,bobj#=decode(:5,0,null,:5),tab#=decode(:6,0,null,:6),intcols=:7,
kernelcols=:8,clucols=decode(:9,0,null,:9),audit$=:10,flags=:11,pctfree$=:12,pctused$=:13,initrans=:14,
maxtrans=:15,rowcnt=:16,blkcnt=:17,empcnt=:18,avgspc=:19,chncnt=:20,avgrln=:21,analyzetime=:22,samplesize=:23,
cols=:24,property=:25,degree=decode(:26,1,null,:26),instances=decode(:27,1,null,:27),dataobj#=:28,avgspc_flb=:29,
flbcnt=:30,trigflag=:31,spare1=:32,spare2=decode(:33,0,null,:33),spare4=:34,spare6=:35 where obj#=:1

更新tab$中相关信息,就是我们平常看到dba_tables中的rows,blocks,last_ddl等一些信息.


insert into sys.wri$_optstat_tab_history(obj#,rowcnt,blkcnt,avgrln, analyzetime,samplesize,cachedblk,cachehit,logicalread,savtime,flags)
 values  (:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11)
insert into sys.wri$_optstat_histgrm_history (obj#,intcol#,savtime,bucket, endpoint,epvalue,colname)  select hg.obj#,hg.intcol#,:3,
hg.bucket,hg.endpoint,hg.epvalue, :4  from sys.histgrm$ hg  where hg.obj# = :1 and hg.intcol# = :2
insert into sys.wri$_optstat_histhead_history (obj#,intcol#,savtime,flags, null_cnt,minimum,maximum,distcnt,density,lowval,hival,
avgcln,sample_distcnt, sample_size,timestamp#,colname)  select h.obj#, h.intcol#, :3, bitand(h.spare2,7) + 8 + decode(h.cache_cnt,0,0,64),
 h.null_cnt, h.minimum, h.maximum, h.distcnt, h.density, h.lowval, h.hival, h.avgcln, h.spare1, h.sample_size, h.timestamp#, :4 
from sys.hist_head$ h where h.obj# = :1 and h.intcol# = :2
把原来的统计信息inert到历史表里.必要的时候可以恢复.表,直方图和栏位三者的历史统计信息是分开存放的.


update hist_head$
set bucket_cnt=:3, row_cnt=:4, cache_cnt=:5,null_cnt=:6, timestamp#=:7, sample_size=:8, minimum=:9,
maximum=:10,distcnt=:11, lowval=:12, hival=:13, density=:14, spare1=:15, spare2=:16, avgcln=:17, col#=:18 where obj#=:1 and intcol#=:2

对栏位的统计信息进行更新,每个栏位一行对应一条这样的语句


SELECT SU.NAME, SO.NAME, A.STATSTYPE#, C.INTCOL#
FROM ASSOCIATION$ A, OBJ$ O, USER$ U, COL$ C, OBJ$ SO, USER$ SU, COLTYPE$ CT, OBJ$ TY
WHERE O.OWNER#=U.USER# AND A.OBJ#=TY.OBJ# AND O.OBJ#=C.OBJ#
AND C.INTCOL#=CT.INTCOL# AND O.OBJ#=CT.OBJ# AND CT.TOID=TY.OID$
AND A.STATSTYPE#=SO.OBJ# AND SO.OWNER#=SU.USER# AND O.TYPE#=2 AND O.NAME='T1' AND U.NAME='FRANK' AND C.NAME='OBJECT_TYPE'

查找栏位的一些信息


oracle收集统计信息太复杂了.

阅读(5176) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~