Chinaunix首页 | 论坛 | 博客
  • 博客访问: 656964
  • 博文数量: 149
  • 博客积分: 3901
  • 博客等级: 中校
  • 技术积分: 1558
  • 用 户 组: 普通用户
  • 注册时间: 2009-02-16 14:33
文章分类

全部博文(149)

文章存档

2014年(2)

2013年(10)

2012年(32)

2011年(21)

2010年(84)

分类:

2010-09-21 18:24:59


代码上 :



#!/usr/bin/python
# -*- coding: utf-8 -*-

'''
参考 :
    
    
    


# 创建存储空间
$> /tmp/sql_by_log.py '
create table test( at text,ab text,auid text,pv real ,UNIQUE (at,ab,auid) ) '

# 插入数据
# sqlite3 DUPLICATE KEY
$> /tmp/sql_by_log.py '

         insert OR IGNORE into test values ( strftime("%Y-%m-%d","#at#") ,"#ab#","#auid#",1);
         UPDATE test SET pv = pv + 1 WHERE at=strftime("%Y-%m-%d","#at#") and auid="#auid#" and ab="#ab#" ;
' 'cat /data/tongji/iphone_app/imusic/app*/2010/09/20/*'
 > 耗时
    insert_num = 65809 , insert_err_num = 2666

    real 0m53.873s
    user 0m6.624s
    sys 0m0.148s
                       

# 查询展现 用户去重复
$> time /tmp/sql_by_log.py '
select count( distinct auid ) from test '
 > 2683
$> time cat /data/tongji/iphone_app/imusic/app*/2010/09/20/* |perl -nle '
print $1 if /auid=(.*?)</ ' |sort -u |wc -l
 > 2684


# 简单查询 用户时长
$> time /tmp/sql_by_log.py '

   select auid , strftime("%s",max(at)) -strftime("%s", min(at)) as ht from test group by auid order by ht desc ;
'



'
''

import sys,os,re
import sqlite3
import traceback


if __name__ == "__main__":
    run_dir,run_scriptfile = os.path.split( __file__ )
    run_scriptfilename,run_scriptfileext = os.path.splitext( run_scriptfile )
    db_sqlite_file = os.path.join( run_dir,"."+run_scriptfilename+".db" )

    conn = sqlite3.connect( db_sqlite_file )
    c = conn.cursor()

    if len(sys.argv)==2 :
        # sql = create table ; select
        sfile,sql = sys.argv
        c.execute(sql)
        for row in c:
            tmp_row = ""
            for col in row : tmp_row += "%s\t" % col
            print tmp_row
        # Save (commit) the changes
        conn.commit()
    elif len(sys.argv)==3 :
        # sql = insert by data_file
        sfile,insert_sql,comm_popen = sys.argv
        insert_num,insert_err_num,row_num = 0,0,0
        err_row_num_arr = []
        for row in os.popen( comm_popen ) :
            t_insert_sql = insert_sql
            row_num += 1
            try :
                tmp = {}
                for cc in row.split('\n')[0].split('<|>'):
                    cd=cc.split('=')
                    if len(cd)==2 : tmp[cd[0]]=cd[1]
                for kk in set( re.findall("#(.*?)#",t_insert_sql ) ) :
                    t_insert_sql = t_insert_sql.replace( "#"+kk+"#",tmp[kk] )
                for tsql in t_insert_sql.split(";") :
                   c.execute( tsql )
                insert_num += 1
            except :
                insert_err_num += 1
                err_row_num_arr.append( str(row_num) )
                #exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
                #traceback.print_exception(exceptionType, exceptionValue, exceptionTraceback,limit=2)

            if row_num % 100 == 0 : conn.commit()
        else : conn.commit()
        print " insert_num = %d , insert_err_num = %d " % (insert_num,insert_err_num)
        #print " error_num = [ " + ",".join( err_row_num_arr ) + " ]"
    # We can also close the cursor if we are done with it
    c.close()


阅读(1052) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~