全部博文(389)
分类: Oracle
2014-05-08 22:10:55
使用Strace跟踪oracle读取数据块
oracle是一个C语言程序,通过调用系统的io函数来实现io的过程,在linux下可以通过strace工具来
很方便查看进程对io函数的调用过程.
测试场景使用了rhel 5.8和oracle database 12C.
查看oracle对应的服务器进程号.
[root@o12c ~]# ps -eaf | grep beq
oracle 1611 1591 0 03:01 ? 00:00:00 oraclec12 (DESCRIPTION=(LOCAL=YES)(ADDRESS=(PROTOCOL=beq)))
root 1683 1650 0 03:02 pts/5 00:00:00 grep beq
使用strace 对进程进行跟踪
[root@o12c ~]# strace -p 1611 -o test1.out
...................
mmap(NULL, 1048576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_NORESERVE, 7, 0x1ed000) = 0x2b5d0629f000
mmap(0x2b5d0629f000, 589824, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 7, 0) = 0x2b5d0629f000
open("/u01/app/oracle/oradata/c12/users01.dbf", O_RDWR|O_SYNC) = 12
getrlimit(RLIMIT_NOFILE, {rlim_cur=64*1024, rlim_max=64*1024}) = 0
fcntl(12, F_DUPFD, 256) = 257
close(12) = 0
fcntl(257, F_SETFD, FD_CLOEXEC) = 0
pread(257, "\6\242\0\0\263\0\200\1\344\215\35\0\0\0\1\4v\244\0\0\1\0\0\0qg\1\0\335\215\35\0"..., 106496, 1466368) = 106496
times(NULL) = 431210068
mmap(NULL, 2097152, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_NORESERVE, 7, 0x2ed000) = 0x2b5d0639f000
mmap(0x2b5d0639f000, 589824, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 7, 0) = 0x2b5d0639f000
pread(257, "\6\242\0\0\301\0\200\1\347\215\35\0\0\0\2\4!H\0\0\1\0\0\0qg\1\0\335\215\35\0"..., 122880, 1581056) = 122880
times(NULL) = 431210068
pread(257, "\6\242\0\0\321\0\200\1\351\215\35\0\0\0\2\4Di\0\0\1\0\0\0qg\1\0\335\215\35\0"..., 122880, 1712128) = 122880
times(NULL) = 431210068
pread(257, "\6\242\0\0\341\0\200\1\353\215\35\0\0\0\2\4\276\361\0\0\1\0\0\0qg\1\0\335\215\35\0"..., 122880, 1843200) = 122880
times(NULL) = 431210068
pread(257, "\6\242\0\0\361\0\200\1\355\215\35\0\0\0\2\4\245\2\0\0\1\0\0\0qg\1\0\335\215\35\0"..., 122880, 1974272) = 122880
times(NULL) = 431210068
pread(257, "\6\242\0\0\1\1\200\1\357\215\35\0\0\0\2\4%\214\0\0\1\0\0\0qg\1\0\335\215\35\0"..., 122880, 2105344) = 122880
times(NULL) = 431210068
pread(257, "\6\242\0\0\21\1\200\1\361\215\35\0\0\0\2\4\243\257\0\0\1\0\0\0qg\1\0\335\215\35\0"..., 122880, 2236416) = 122880
times(NULL) = 431210068
pread(257, "\6\242\0\0!\1\200\1\363\215\35\0\0\0\2\4zY\0\0\1\0\0\0qg\1\0\335\215\35\0"..., 122880, 2367488) = 122880
首先调用open函数,读取表空间的数据文件返回句柄12,然后使用fcntl进行文件句柄复制和其他操作,原来的文件句柄12被关闭.接下来使用pread函数来读取文件.附带也证明了一点就是oracle服务器进程负责去读取数据文件,而写数据文件是通过后台进程dbwr来完成的.
接下来我们打开sql trace,通过sql trace文件和strace文件对应,过程更加直观.
[root@o12c ~]# ps -eaf | grep beq
oracle 1916 1915 0 03:16 ? 00:00:00 oraclec12 (DESCRIPTION=(LOCAL=YES)(ADDRESS=(PROTOCOL=beq)))
SQL> alter session set events '10046 trace name context forever,level 8';
Session altered.
[root@o12c ~]# strace -p 1916 -e write=all -e all -o test2.out
open("/u01/app/oracle/oradata/c12/users01.dbf", O_RDWR|O_SYNC) = 15
getrlimit(RLIMIT_NOFILE, {rlim_cur=64*1024, rlim_max=64*1024}) = 0
fcntl(15, F_DUPFD, 256) = 257
close(15) = 0
fcntl(257, F_SETFD, FD_CLOEXEC) = 0
write(12, "WAIT #47487286861640: nam='Disk "..., 129) = 129
| 00000 57 41 49 54 20 23 34 37 34 38 37 32 38 36 38 36 WAIT #47 48728686 |
| 00010 31 36 34 30 3a 20 6e 61 6d 3d 27 44 69 73 6b 20 1640: na m='Disk |
| 00020 66 69 6c 65 20 6f 70 65 72 61 74 69 6f 6e 73 20 file ope rations |
| 00030 49 2f 4f 27 20 65 6c 61 3d 20 31 36 30 20 46 69 I/O' ela = 160 Fi |
| 00040 6c 65 4f 70 65 72 61 74 69 6f 6e 3d 32 20 66 69 leOperat ion=2 fi |
| 00050 6c 65 6e 6f 3d 36 20 66 69 6c 65 74 79 70 65 3d leno=6 f iletype= |
| 00060 32 20 6f 62 6a 23 3d 39 32 30 31 37 20 74 69 6d 2 obj#=9 2017 tim |
| 00070 3d 31 33 39 36 31 32 30 36 33 33 36 35 37 35 33 =1396120 63365753 |
| 00080 31 1 |
write(12, "\n", 1) = 1
| 00000 0a . |
pread(257, "#\242\0\0\262\0\200\1F\216\35\0\0\0\1\4\346\312\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192, 1458176) = 8192
write(12, "WAIT #47487286861640: nam='db fi"..., 120) = 120
| 00000 57 41 49 54 20 23 34 37 34 38 37 32 38 36 38 36 WAIT #47 48728686 |
| 00010 31 36 34 30 3a 20 6e 61 6d 3d 27 64 62 20 66 69 1640: na m='db fi |
| 00020 6c 65 20 73 65 71 75 65 6e 74 69 61 6c 20 72 65 le seque ntial re |
| 00030 61 64 27 20 65 6c 61 3d 20 38 37 36 20 66 69 6c ad' ela= 876 fil |
| 00040 65 23 3d 36 20 62 6c 6f 63 6b 23 3d 31 37 38 20 e#=6 blo ck#=178 |
| 00050 62 6c 6f 63 6b 73 3d 31 20 6f 62 6a 23 3d 39 32 blocks=1 obj#=92 |
| 00060 30 31 37 20 74 69 6d 3d 31 33 39 36 31 32 30 36 017 tim= 13961206 |
| 00070 33 33 36 35 38 35 34 34 33658544 |
write(12, "\n", 1) = 1
| 00000 0a . |
mmap(0x2b307eeeb000, 131072, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 7, 0) = 0x2b307eeeb000
pread(257, "\6\242\0\0\263\0\200\1\344\215\35\0\0\0\1\4v\244\0\0\1\0\0\0qg\1\0\335\215\35\0"..., 106496, 1466368) = 106496
times(NULL) = 431298669
times(NULL) = 431298669
write(12, "WAIT #47487286861640: nam='SQL*N"..., 124) = 124
| 00000 57 41 49 54 20 23 34 37 34 38 37 32 38 36 38 36 WAIT #47 48728686 |
| 00010 31 36 34 30 3a 20 6e 61 6d 3d 27 53 51 4c 2a 4e 1640: na m='SQL*N |
| 00020 65 74 20 6d 65 73 73 61 67 65 20 74 6f 20 63 6c et messa ge to cl |
| 00030 69 65 6e 74 27 20 65 6c 61 3d 20 33 20 64 72 69 ient' el a= 3 dri |
| 00040 76 65 72 20 69 64 3d 31 36 35 30 38 31 35 32 33 ver id=1 65081523 |
| 00050 32 20 23 62 79 74 65 73 3d 31 20 70 33 3d 30 20 2 #bytes =1 p3=0 |
| 00060 6f 62 6a 23 3d 2d 31 20 74 69 6d 3d 31 33 39 36 obj#=-1 tim=1396 |
| 00070 31 32 30 36 33 33 36 35 36 38 37 38 12063365 6878 |
write(12, "\n", 1) = 1
| 00000 0a . |
getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 54991}, ru_stime={0, 37994}, ...}) = 0
open("/u01/app/oracle/oradata/c12/users01.dbf", O_RDWR|O_SYNC) = 15
getrlimit(RLIMIT_NOFILE, {rlim_cur=64*1024, rlim_max=64*1024}) = 0
fcntl(15, F_DUPFD, 256) = 257
close(15) = 0
fcntl(257, F_SETFD, FD_CLOEXEC) = 0
write(12, "WAIT #47487286861640: nam='Disk "..., 129) = 129
| 00000 57 41 49 54 20 23 34 37 34 38 37 32 38 36 38 36 WAIT #47 48728686 |
| 00010 31 36 34 30 3a 20 6e 61 6d 3d 27 44 69 73 6b 20 1640: na m='Disk |
| 00020 66 69 6c 65 20 6f 70 65 72 61 74 69 6f 6e 73 20 file ope rations |
| 00030 49 2f 4f 27 20 65 6c 61 3d 20 31 36 30 20 46 69 I/O' ela = 160 Fi |
| 00040 6c 65 4f 70 65 72 61 74 69 6f 6e 3d 32 20 66 69 leOperat ion=2 fi |
| 00050 6c 65 6e 6f 3d 36 20 66 69 6c 65 74 79 70 65 3d leno=6 f iletype= |
| 00060 32 20 6f 62 6a 23 3d 39 32 30 31 37 20 74 69 6d 2 obj#=9 2017 tim |
| 00070 3d 31 33 39 36 31 32 30 36 33 33 36 35 37 35 33 =1396120 63365753 |
| 00080 31 1 |
write(12, "\n", 1) = 1
| 00000 0a . |
pread(257, "#\242\0\0\262\0\200\1F\216\35\0\0\0\1\4\346\312\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192, 1458176) = 8192
write(12, "WAIT #47487286861640: nam='db fi"..., 120) = 120
| 00000 57 41 49 54 20 23 34 37 34 38 37 32 38 36 38 36 WAIT #47 48728686 |
| 00010 31 36 34 30 3a 20 6e 61 6d 3d 27 64 62 20 66 69 1640: na m='db fi |
| 00020 6c 65 20 73 65 71 75 65 6e 74 69 61 6c 20 72 65 le seque ntial re |
| 00030 61 64 27 20 65 6c 61 3d 20 38 37 36 20 66 69 6c ad' ela= 876 fil |
| 00040 65 23 3d 36 20 62 6c 6f 63 6b 23 3d 31 37 38 20 e#=6 blo ck#=178 |
| 00050 62 6c 6f 63 6b 73 3d 31 20 6f 62 6a 23 3d 39 32 blocks=1 obj#=92 |
| 00060 30 31 37 20 74 69 6d 3d 31 33 39 36 31 32 30 36 017 tim= 13961206 |
| 00070 33 33 36 35 38 35 34 34 33658544 |
write(12, "\n", 1) = 1
| 00000 0a . |
mmap(0x2b307eeeb000, 131072, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 7, 0) = 0x2b307eeeb000
pread(257, "\6\242\0\0\263\0\200\1\344\215\35\0\0\0\1\4v\244\0\0\1\0\0\0qg\1\0\335\215\35\0"..., 106496, 1466368) = 106496
times(NULL) = 431298669
write(12, "WAIT #47487286861640: nam='direc"..., 126) = 126
| 00000 57 41 49 54 20 23 34 37 34 38 37 32 38 36 38 36 WAIT #47 48728686 |
| 00010 31 36 34 30 3a 20 6e 61 6d 3d 27 64 69 72 65 63 1640: na m='direc |
| 00020 74 20 70 61 74 68 20 72 65 61 64 27 20 65 6c 61 t path r ead' ela |
| 00030 3d 20 32 30 33 20 66 69 6c 65 20 6e 75 6d 62 65 = 203 fi le numbe |
| 00040 72 3d 36 20 66 69 72 73 74 20 64 62 61 3d 31 37 r=6 firs t dba=17 |
| 00050 39 20 62 6c 6f 63 6b 20 63 6e 74 3d 31 33 20 6f 9 block cnt=13 o |
| 00060 62 6a 23 3d 39 32 30 31 37 20 74 69 6d 3d 31 33 bj#=9201 7 tim=13 |
| 00070 39 36 31 32 30 36 33 33 36 36 31 31 34 31 96120633 661141 |
write(12, "\n", 1) = 1
| 00000 0a . |
mmap(NULL, 1048576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_NORESERVE, 7, 0x1e9000) = 0x2b307ef7b000
mmap(0x2b307ef7b000, 589824, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 7, 0) = 0x2b307ef7b000
pread(257, "\6\242\0\0\301\0\200\1\347\215\35\0\0\0\2\4!H\0\0\1\0\0\0qg\1\0\335\215\35\0"..., 122880, 1581056) = 122880
times(NULL) = 431298669
打开trace文件来看对应的direct path read 事件
select count(*) from c##frank.t1
END OF STMT
PARSE #47487286861640:c=68989,e=250226,p=20,cr=361,cu=0,mis=1,r=0,dep=0,og=1,plh=3724264953,tim=1396120633656121
EXEC #47487286861640:c=0,e=66,p=0,cr=0,cu=0,mis=0,r=0,dep=0,og=1,plh=3724264953,tim=1396120633656660
WAIT #47487286861640: nam='SQL*Net message to client' ela= 3 driver id=1650815232 #bytes=1 p3=0 obj#=-1 tim=1396120633656878
WAIT #47487286861640: nam='Disk file operations I/O' ela= 160 FileOperation=2 fileno=6 filetype=2 obj#=92017 tim=1396120633657531
WAIT #47487286861640: nam='db file sequential read' ela= 876 file#=6 block#=178 blocks=1 obj#=92017 tim=1396120633658544
WAIT #47487286861640: nam='direct path read' ela= 203 file number=6 first dba=179 block cnt=13 obj#=92017 tim=1396120633661141
WAIT #47487286861640: nam='direct path read' ela= 275 file number=6 first dba=193 block cnt=15 obj#=92017 tim=1396120633661688
WAIT #47487286861640: nam='direct path read' ela= 149 file number=6 first dba=209 block cnt=15 obj#=92017 tim=1396120633662062
WAIT #47487286861640: nam='direct path read' ela= 170 file number=6 first dba=225 block cnt=15 obj#=92017 tim=1396120633662610
WAIT #47487286861640: nam='direct path read' ela= 141 file number=6 first dba=241 block cnt=15 obj#=92017 tim=1396120633662915
WAIT #47487286861640: nam='direct path read' ela= 137 file number=6 first dba=257 block cnt=15 obj#=92017 tim=1396120633663278
第一个direct path read事件,对应之前的一个io函数,只有在io函数执行完成,才能输出时间,所以sql trace的等待事件都是在事件发生完成后才会输出
mmap(0x2b307eeeb000, 131072, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 7, 0) = 0x2b307eeeb000
pread(257, "\6\242\0\0\263\0\200\1\344\215\35\0\0\0\1\4v\244\0\0\1\0\0\0qg\1\0\335\215\35\0"..., 106496, 1466368) = 106496
times(NULL) = 431298669
WAIT #47487286861640: nam='direct path read' ela= 203 file number=6 first dba=179 block cnt=13 obj#=92017 tim=1396120633661141
可以看出是读取了13个块,每个块是8KB大小. 13*8*1024=106496 个字节,刚好和前一个函数返回的字节数对应。