#!/usr/bin/python
# -*- coding: utf-8 -*-
'''
参考 :
# 创建存储空间
$> /tmp/sql_by_log.py ' create table test( at text,ab text,auid text,pv real ,UNIQUE (at,ab,auid) ) '
# 插入数据
# sqlite3 DUPLICATE KEY
$> /tmp/sql_by_log.py '
insert OR IGNORE into test values ( strftime("%Y-%m-%d","#at#") ,"#ab#","#auid#",1);
UPDATE test SET pv = pv + 1 WHERE at=strftime("%Y-%m-%d","#at#") and auid="#auid#" and ab="#ab#" ;
' 'cat /data/tongji/iphone_app/imusic/app*/2010/09/20/*'
> 耗时
insert_num = 65809 , insert_err_num = 2666
real 0m53.873s
user 0m6.624s
sys 0m0.148s
# 查询展现 用户去重复
$> time /tmp/sql_by_log.py ' select count( distinct auid ) from test '
> 2683
$> time cat /data/tongji/iphone_app/imusic/app*/2010/09/20/* |perl -nle ' print $1 if /auid=(.*?)</ ' |sort -u |wc -l
> 2684
# 简单查询 用户时长
$> time /tmp/sql_by_log.py '
select auid , strftime("%s",max(at)) -strftime("%s", min(at)) as ht from test group by auid order by ht desc ;
'
'''
import sys,os,re
import sqlite3
import traceback
if __name__ == "__main__":
run_dir,run_scriptfile = os.path.split( __file__ )
run_scriptfilename,run_scriptfileext = os.path.splitext( run_scriptfile )
db_sqlite_file = os.path.join( run_dir,"."+run_scriptfilename+".db" )
conn = sqlite3.connect( db_sqlite_file )
c = conn.cursor()
if len(sys.argv)==2 :
# sql = create table ; select
sfile,sql = sys.argv
c.execute(sql)
for row in c:
tmp_row = ""
for col in row : tmp_row += "%s\t" % col
print tmp_row
# Save (commit) the changes
conn.commit()
elif len(sys.argv)==3 :
# sql = insert by data_file
sfile,insert_sql,comm_popen = sys.argv
insert_num,insert_err_num,row_num = 0,0,0
err_row_num_arr = []
for row in os.popen( comm_popen ) :
t_insert_sql = insert_sql
row_num += 1
try :
tmp = {}
for cc in row.split('\n')[0].split('<|>'):
cd=cc.split('=')
if len(cd)==2 : tmp[cd[0]]=cd[1]
for kk in set( re.findall("#(.*?)#",t_insert_sql ) ) :
t_insert_sql = t_insert_sql.replace( "#"+kk+"#",tmp[kk] )
for tsql in t_insert_sql.split(";") :
c.execute( tsql )
insert_num += 1
except :
insert_err_num += 1
err_row_num_arr.append( str(row_num) )
#exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
#traceback.print_exception(exceptionType, exceptionValue, exceptionTraceback,limit=2)
if row_num % 100 == 0 : conn.commit()
else : conn.commit()
print " insert_num = %d , insert_err_num = %d " % (insert_num,insert_err_num)
#print " error_num = [ " + ",".join( err_row_num_arr ) + " ]"
# We can also close the cursor if we are done with it
c.close()
|