insert into table us_mac_stat partition(day='20170619') select mobile_user_id, c from (select mobile_user_id, count(distinct mac) c from default.mobile_request_log_orc where day_id='20170619' and mobile_user_id<>'' and mobile_user_id is not null and mac<>'' and mac is not null group by (mobile_user_id) t where t.c>1;
insert into table us_ip_stat partition(day='20170621',h) select mobile_user_id, c, hour_id from ( select mobile_user_id, count(distinct ip) c , hour_id from default.mobile_request_log_orc where day_id='20170619' and mobile_user_id<>'' and mobile_user_id is not null and ip<>'' and ip is not null group by mobile_user_id,hour_id) t where t.c>2;
from ...
inster into..
inster into ..
合并
from ( select mobile_user_id,ip,mac,hour_id from default.mobile_request_log_orc where day_id='20170619' and mobile_user_id<>'' and mobile_user_id is not null) publicf
insert into table us_mac_stat partition(day='20170619') select mobile_user_id, count(distinct mac) c where mac<>'' and mac is not null group by mobile_user_id having count(distinct mac)>1
insert into table us_ip_stat partition(day='20170619',h) select mobile_user_id, count(distinct ip) cip , hour_id where ip<>'' and ip is not null group by mobile_user_id,hour_id having count(distinct ip)>2;
执行后,发现h分区里一个个文件都很小,分件数量很多,需要手动设置一下参数
set mapred.reduce.tasks = 50;
根据自己的数据量大小调整
阅读(4556) | 评论(0) | 转发(0) |