1. 启动 ./mongod --dbpath tools/mongodb-linux-x86_64-1.4.0/data/d1/
1.1 原始待分析 - 数据 at=2010-04-13 00:01:21<|>ao=...<|>xx=........
2.0 说明 -
main_mong.py ( pymongo ) 进行数据导入
+- 依赖
model.conf
配置文件
+- 依赖 utils.py 工具包
-
user_stat.js (新老用户)
实际统计 脚本
+- mongo 内部环境 运行
- run_mong.sh , run_ma_mong.sh 数据导入脚本
2.0.1 流程 : 。定时任务 crontab (shell+python) 导入 mongodb 数据
。编辑统计分析脚本 在 stat/ 下些 mongo 可运行的 js
。在前台(java + mongo client java ) eval 运行 ,展现
* > load('/data/shell/gmodel/dw_model/stat/user_stat.js') ;
* > use dw
* > getUser("100008","2010-04-11","2010-04-12",db.data);
{ "allUser" : 5151, "oldUser" : 3936 }
2.0.1 目录结构.
|-- conf
| `-- model.conf
|-- data_cleaning
|-- main_mong.py
|-- nohup.out
|-- run_ma_mong.sh
|-- run_mong.sh
|-- stat
| `-- user_stat.js
`-- utils
|-- __init__.py
|-- utils.py
|
2.1 model.conf
[logSou]
#数据收集 配置出 分析数据 一般 接受都为 cat path2/* path/file
# path2/* path/file 在此配置
result_stat=/data/games/co_answer*/@Y/@M/@D/*
audiobooks_logSou2=/data/tongji/iphone_app/*/app*/@Y/@M/@D/*
audiobooks_logSou2_hour=/data/tongji/iphone_app/*/app*/@Y/@M/@D/*-@H
ab_imusic=/data/tongji/iphone_app/imusic/app*/@Y/@M/@D/*
ab_ibook=/data/tongji/iphone_app/ibook/app*/@Y/@M/@D/*
ab_itools=/data/tongji/iphone_app/itools/app*/@Y/@M/@D/*
。。。。
|
2.2 utils.py#encoding: utf-8
import ConfigParser
import MySQLdb
import time,os,sys,re
config = ConfigParser.RawConfigParser()
config.read('/data/shell/gmodel/dw_model/conf/model.conf')
getConn = lambda : MySQLdb.connect(host=config.get('db', 'host'),port=config.getint('db', 'port') ,
user=config.get('db', 'user') ,passwd=config.get('db', 'passwd'),
db=config.get('db', 'database'),charset=config.get('db', 'charset'))
# date -d "\$[$nt*-1] day" +%Y-%m-%d
getAgoday = lambda x=1 : os.popen('date -d "$['+str(x)+'*-1] day" +%Y-%m-%d').read().split('\n')[0]
getAgohour = lambda x=1 : os.popen('date -d "$['+str(x)+'*-1] hour" +%H').read().split('\n')[0]
def getLog(logname='default.log'):
cof=getConfig()
runlogpath = cof.get('env', 'runlog')
print "log file => ",runlogpath
if not os.path.exists(runlogpath):
os.makedirs(runlogpath)
if cof.get('run','runprint')=='file':
runlog = open(runlogpath+'/'+logname,'aw')
else :
runlog=sys.stdout
print runlog
return runlog
class getConfig :
def __init__(self,ttime=getAgoday(),hhour=getAgohour()):
self._time=ttime
self._hour=hhour
self.rp={"@Y":self. _time.split('-')[0],
"@M":self._time.split('-')[1],
"@D":self._time.split('-')[2] ,
"@H":self._hour,
}
def get(self,group,key):
ss = config.get(group,key)
for k in self.rp.keys():
ss=ss.replace(k,self.rp[k])
return ss
def getint(self,group,key):
return config.getint(group,key)
def getTime(self): return self._time
def getHour(self): return self._hour
|
2.3 main_mong.py#!/bin/python
#encoding: utf-8
import pymongo
import utils.utils as utils
import data_cleaning.data_cleaning as dataCleaning
import sys,traceback,os,time
#取得 config 对象
#cf = utils.getConfig()
if len(sys.argv)>=3 : cf = utils.getConfig( sys.argv[2] )
else : cf =
utils.getConfig()
#对于 model.conf 文件中 logSou 配置
load_file_path = sys.argv[1]
cat_data = 'cat '+cf.get('logSou',load_file_path)
connection = pymongo.Connection("localhost", 27017)
db = connection.dw
db.data
try:
for row in os.popen(cat_data) :
tmp={}
if "aop=t" in row :
continue
for cc in row.split('\n')[0].split('<|>') :
cd=cc.split('=')
if len(cd)==2 : tmp[ cd[0] ]=cd[1]
db.data.save(tmp)
except :
exceptionType,
exceptionValue,
exceptionTraceback = sys.exc_info()
traceback.print_exception(exceptionType, exceptionValue, exceptionTraceback,limit=2)
|
2.4 run_mong.shecho ""
echo "-----------------------------------------------------------------------------"
ttime=$1
if [ -z "$ttime" ]; then
ttime=$(date -d "-1 day" +%Y-%m-%d)
fi
python /data/shell/gmodel/dw_model/main_mong.py result_stat $ttime
python /data/shell/gmodel/dw_model/main_mong.py ab_imusic $ttime
python /data/shell/gmodel/dw_model/main_mong.py ab_ibook $ttime
python /data/shell/gmodel/dw_model/main_mong.py ab_itools $ttime
python /data/shell/gmodel/dw_model/main_mong.py iphone_logSou_2 $ttime
#python /data/shell/gmodel/dw_model/main.py iphone_logSou $ttime
python /data/shell/gmodel/dw_model/main_mong.py audiobooks_logSou2 $ttime
|
2.5 run_ma_mong.shfunction action(){
ttime=$1;
sh /data/shell/gmodel/dw_model/run_mong.sh $ttime
echo "$ttime end ...."
echo ""
echo ""
}
#y=`date -d "-7 day" +%Y-%m-%d`
#et='2009-12-10'
#st='2009-09-11'
et=$2
st=$1
ii=0
while true
do
_ttime=$(date -d "$st $ii day" +%Y-%m-%d)
ii=$[$ii+1]
action $_ttime;
if [ "$_ttime" = "$et" ] ;
then break;
fi
done
|
2.6 user_stat.js//
//db.data.ensureIndex({"auid":1});
//db.data.ensureIndex({"at":1});
//db.data.ensureIndex({"ab":1});
//db.data.ensureIndex({"aop":1});
function getUser(ab,st,et,dc){
var cc = dc.distinct("auid",{ "ab":ab,"aop":"a","at":{$gt:st,$lt:et} });
var aus = cc.length ;
var ou = {} ;
cc.forEach(function(uid) {
var ff = dc.find({ "ab":ab,"aop":"a","auid":uid });
while(ff.hasNext()){
var oo = ff.next();
if( oo.at < st ){
//print(oo.auid) ;
ou[oo.auid]=0;
break;
}
}
});
var ous = 0;
for(var c in ou){ous++;}
return { "allUser":aus,"oldUser":ous };
}
|
阅读(2845) | 评论(0) | 转发(0) |