8. 使用load table语句装载数据
使用tpch_user登录IQ数据库,执行下面的load_tables.sql脚本。执行方法如下:
dbisql -c "uid=tpch_user;pwd=tpchpwd;eng=tpch_srv" -nogui load_tables.sql
load_tables.sql内容如下:
--load_tables.sql
BEGIN
declare startDate datetime;
declare endDate datetime;
set startDate=getDate();
LOAD TABLE part
(
p_partkey '|' ,
p_name '|' ,
p_mfgr '|' ,
p_brand '|' ,
p_type '|' ,
p_size '|' ,
p_container '|' ,
p_retailprice '|' ,
p_comment '|'
)
FROM '/sybiqdb/tpchdb/data/part.tbl'
FORMAT ASCII
STRIP ON
ESCAPES OFF
QUOTES OFF
NOTIFY 500000
ROW DELIMITED BY '\x0a'
WITH CHECKPOINT ON;
COMMIT;
set endDate=getDate();
message '[load part execute time is : ',datediff(ms,startDate,endDate),'ms]' type info to client;
END;
LOAD TABLE region
(
r_regionkey '|' ,
r_name '|' ,
r_comment '|'
)
FROM '/sybiqdb/tpchdb/data/region.tbl'
FORMAT ASCII
STRIP ON
ESCAPES OFF
QUOTES OFF
NOTIFY 500000
ROW DELIMITED BY '\x0a'
WITH CHECKPOINT ON;
COMMIT;
LOAD TABLE nation
(
n_nationkey '|' ,
n_name '|' ,
n_regionkey '|' ,
n_comment '|'
)
FROM '/sybiqdb/tpchdb/data/nation.tbl'
FORMAT ASCII
STRIP ON
ESCAPES OFF
QUOTES OFF
NOTIFY 500000
ROW DELIMITED BY '\x0a'
WITH CHECKPOINT ON;
COMMIT;
BEGIN
declare startDate datetime;
declare endDate datetime;
set startDate=getDate();
LOAD TABLE supplier
(
s_suppkey '|' ,
s_name '|' ,
s_address '|' ,
s_nationkey '|' ,
s_phone '|' ,
s_acctbal '|' ,
s_comment '|'
)
FROM '/sybiqdb/tpchdb/data/supplier.tbl'
FORMAT ASCII
STRIP ON
ESCAPES OFF
QUOTES OFF
NOTIFY 500000
ROW DELIMITED BY '\x0a'
WITH CHECKPOINT ON;
COMMIT;
set endDate=getDate();
message '[load supplier execute time is : ',datediff(ms,startDate,endDate),'ms]' type info to client;
END;
BEGIN
declare startDate datetime;
declare endDate datetime;
set startDate=getDate();
LOAD TABLE partsupp
(
ps_partkey '|' ,
ps_suppkey '|' ,
ps_availqty '|' ,
ps_supplycost '|' ,
ps_comment '|'
)
FROM '/sybiqdb/tpchdb/data/partsupp.tbl'
FORMAT ASCII
STRIP ON
ESCAPES OFF
QUOTES OFF
NOTIFY 500000
ROW DELIMITED BY '\x0a'
WITH CHECKPOINT ON;
COMMIT;
set endDate=getDate();
message '[load partsupp execute time is : ',datediff(ms,startDate,endDate),'ms]' type info to client;
END;
BEGIN
declare startDate datetime;
declare endDate datetime;
set startDate=getDate();
LOAD TABLE customer
(
c_custkey '|' ,
c_name '|' ,
c_address '|' ,
c_nationkey '|' ,
c_phone '|' ,
c_acctbal '|' ,
c_mktsegment '|' ,
c_comment '|'
)
FROM '/sybiqdb/tpchdb/data/customer.tbl'
FORMAT ASCII
STRIP ON
ESCAPES OFF
QUOTES OFF
NOTIFY 500000
ROW DELIMITED BY '\x0a'
WITH CHECKPOINT ON;
COMMIT;
set endDate=getDate();
message '[load customer execute time is : ',datediff(ms,startDate,endDate),'ms]' type info to client;
END;
BEGIN
declare startDate datetime;
declare endDate datetime;
set startDate=getDate();
LOAD TABLE orders
(
o_orderkey '|' ,
o_custkey '|' ,
o_orderstatus '|' ,
o_totalprice '|' ,
o_orderdate '|' ,
o_orderpriority '|' ,
o_clerk '|' ,
o_shippriority '|' ,
o_comment '|'
)
FROM '/sybiqdb/tpchdb/data/orders.tbl'
FORMAT ASCII
STRIP ON
ESCAPES OFF
QUOTES OFF
NOTIFY 500000
ROW DELIMITED BY '\x0a'
WITH CHECKPOINT ON;
COMMIT;
set endDate=getDate();
message '[load orders execute time is : ',datediff(ms,startDate,endDate),'ms]' type info to client;
END;
BEGIN
declare startDate datetime;
declare endDate datetime;
set startDate=getDate();
LOAD TABLE lineitem
(
l_orderkey '|' ,
l_partkey '|' ,
l_suppkey '|' ,
l_linenumber '|' ,
l_quantity '|' ,
l_extendedprice '|' ,
l_discount '|' ,
l_tax '|' ,
l_returnflag '|' ,
l_linestatus '|' ,
l_shipdate '|' ,
l_commitdate '|' ,
l_receiptdate '|' ,
l_shipinstruct '|' ,
l_shipmode '|' ,
l_comment '|'
)
FROM '/sybiqdb/tpchdb/data/lineitem.tbl.1','/sybiqdb/tpchdb/data/lineitem.tbl.2',
'/sybiqdb/tpchdb/data/lineitem.tbl.3','/sybiqdb/tpchdb/data/lineitem.tbl.4'
FORMAT ASCII
STRIP ON
ESCAPES OFF
QUOTES OFF
NOTIFY 500000
ROW DELIMITED BY '\x0a'
WITH CHECKPOINT ON;
COMMIT;
set endDate=getDate();
message '[load lineitem execute time is : ',datediff(ms,startDate,endDate),'ms]' type info to client;
END;
9. load执行情况说明
(1) 数据压缩情况
原始数据尺寸:1050M
IQ数据库中尺寸(TPCH_USER_MAIN dbspace):672M
IQ存放用户数据尺寸只有原始数据尺寸的64%
在同样的环境、表定义和数据下,测试了Oracle 10g R2。测试结果如下:
原始数据尺寸:1050M
Oracle数据库中尺寸(TPCH01 tablespace):
1420M (建表时没有使用压缩)
1011M (建表时使用了compress选项)
Oracle存放用户数据尺寸是原始数据尺寸的1.35倍 是IQ的2.11倍;即使采用了压缩也比IQ占用的空间大.
注:oracle数据库db_block_size=8K;
show sga输出如下:
Total System Global Area 264241152 bytes
Fixed Size 2020024 bytes
Variable Size 83889480 bytes
Database Buffers 171966464 bytes
Redo Buffers 6365184 bytes
(2) 装载时间情况
下面是在我个人的笔记本(CPU:Intel Core i5 M540 2.53GHz,MEMORY:4GB)上使用vmware虚拟出的Linux虚机(虚机分配的内存为1G)上执行装载时的执行时间数据:
--output
[load part execute time is : 780ms]
200000 row(s) affected
5 row(s) affected
25 row(s) affected
[load supplier execute time is : 102ms]
10000 row(s) affected
[load partsupp execute time is : 2343ms]
800000 row(s) affected
[load customer execute time is : 607ms]
150000 row(s) affected
[load orders execute time is : 4481ms]
1500000 row(s) affected
[load lineitem execute time is : 35090ms]
6001215 row(s) affected
Execution time: 43.575 seconds
可以看到,总共执行时间大约是44秒。对于记录数最大、表最宽的lineitem,装载速度大约17万多行/秒!由此可以看到IQ的简单、较高的压缩能力、较高的性能等特点!
在同样的环境、表定义和数据下,测试了Oracle 10g R2。测试结果如下(以最大的lineitme表为例)
(1) 使用sqlldr工具,dirtect=false
执行时间:00:03:39.67 --3分39秒多
(2) 使用sqlldr工具,dirtect=true
执行时间:00:01:50.84 --1分50秒多
(3) 如果建表使用了compress选项,时间是00:02:30:99。虽然节省了空间,但是装载时间变长!
IQ用时35秒多,只是Oracle的1/3左右!
下面是oracle测试时lineitem表转载命令和sqlldr格式文件:
--lineitem.ctl
load data
infile '/home/oracle/tpchdb/data/lineitem.tbl.1' "str '|\n'"
infile '/home/oracle/tpchdb/data/lineitem.tbl.2' "str '|\n'"
infile '/home/oracle/tpchdb/data/lineitem.tbl.3' "str '|\n'"
infile '/home/oracle/tpchdb/data/lineitem.tbl.4' "str '|\n'"
append
into table lineitem
fields terminated by '|' optionally enclosed by '"'
(
l_orderkey ,
l_partkey ,
l_suppkey ,
l_linenumber ,
l_quantity ,
l_extendedprice,
l_discount ,
l_tax ,
l_returnflag ,
l_linestatus ,
l_shipdate ,
l_commitdate ,
l_receiptdate ,
l_shipinstruct ,
l_shipmode ,
l_comment
)
--使用sqlldr装载lineitem表的命令:
sqlldr userid=tpch_user/tpchpwd control=lineitem.ctl direct=false parallel=false readsize=4096000 bindsize=4096000 rows=10000
sqlldr userid=tpch_user/tpchpwd control=lineitem.ctl direct=true parallel=false readsize=4096000 bindsize=4096000 rows=10000