hive向表格中插入数据并分析语句

1,---导入mds_imei_month_info

set hive.exec.max.dynamic.partitions= ; //最大的动态分区表
set hive.support.concurrency=false; //是否支持并发
set hive.exec.max.dynamic.partitions.pernode= ; //each mapper or reducer可以创建的最大动态分区数
set hive.exec.dynamic.partition.mode=nonstrict; //strict是避免全分区字段是动态的,必须有至少一个分区字段是指定有值的
insert into table mds_imei_month_info partition(month)
select imei_p,dt,cnt ,month from
(
select imei_p ,month,dt,cnt from
(select imei_p,'' as month,sum(pow(,(dt-))) as dt,sum(cnt) cnt //将自下面取出的dt进行指数的转换,pow(2,(dt-1))表示2的dt-1次方,cnt表示imei在这个月出现的次数
(
select imei_p,cast(substring(dt,,) as int) as dt,count(*) cnt from mds_engine_basic where dt>= and dt<= and length(dt)= group by imei_p,dt //dt一共8位数,从第7位数开始的2位数取出,既01到31
) a group by imei_p)a where length(imei_p)>= and regexp_extract(imei_p,'([a-z,,.,A-Z,0-9,_,\\-]*)',)=imei_p //这是对imei进行正则匹配
)a;

2,---导入mds_ip_month_info

set hive.exec.max.dynamic.partitions= ;
set hive.support.concurrency=false;
set hive.exec.max.dynamic.partitions.pernode= ;
set hive.exec.dynamic.partition.mode=nonstrict;
insert into table mds_ip_month_info partition(month)
select user_ip,country,province,city,longtitude,latitude,isp,dt,cnt ,month from
(
select user_ip ,month,dt,cnt,country,city,province,latitude,longtitude,isp from
(select user_ip,'' as month,sum(pow(,(dt-))) as dt,sum(cnt) cnt,country,city,province,latitude,longtitude,isp
from(
select user_ip,cast(substring(dt,,) as int) as dt,count(*) cnt,ipaddressquery(,user_ip) country,
ipaddressquery(,user_ip) province,ipaddressquery(,user_ip) city, split(ipaddressquery(,user_ip),',')[] longtitude ,
split(ipaddressquery(,user_ip),',')[] latitude,
ipaddressquery(,user_ip) isp from mds_engine_basic where dt>= and dt<= and length(dt)= and user_ip not like '%,%'
and split(user_ip,',')[] like '%.%.%' and regexp_extract( split(user_ip,',')[],'\.([0-9]{0,7})\.([0-9]{0,7})\.([0-9]{0,7})\.([0-9]{0,7})',)= split(user_ip,',')[] and split(user_ip,'\\.')[]<
group by user_ip,dt,ipaddressquery(,user_ip) ,
ipaddressquery(,user_ip) ,ipaddressquery(,user_ip) , split(ipaddressquery(,user_ip),',')[] ,
split(ipaddressquery(,user_ip),',')[] ,
ipaddressquery(,user_ip)) a group by user_ip,country,city,province,latitude,longtitude,isp)a
)a ;

3,---导入mds_id_month_info

set hive.exec.max.dynamic.partitions= ;
set hive.support.concurrency=false;
set hive.exec.max.dynamic.partitions.pernode= ;
set hive.exec.dynamic.partition.mode=nonstrict;
insert into table mds_id_month_info partition(month)
select id,dt,cnt ,month from
(
select id ,month,dt,cnt from
(select id,'' as month,sum(pow(,(dt-))) as dt,sum(cnt) cnt
from
(
select id,cast(substring(dt,,) as int) as dt,count(*) cnt from mds_engine_basic where dt>= and dt<= and length(dt)= group by id,dt
) a group by id)a where length(id)= and regexp_extract(id,'([a-z,,.,A-Z,0-9,_,\\-]*)',)=id
)a;

4,---导入mds_bssid_month_info

set hive.exec.max.dynamic.partitions= ;
set hive.support.concurrency=false;
set hive.exec.max.dynamic.partitions.pernode= ;
set hive.exec.dynamic.partition.mode=nonstrict;
insert into table mds_bssid_month_info partition(month)
select bssid,dt,cnt ,month from
(
select bssid ,month,dt,cnt from
(select bssid,'' as month,sum(pow(,(dt-))) as dt,sum(cnt) cnt
from
(
select bssid,cast(substring(dt,,) as int) as dt,count(*) cnt from mds_engine_wifi where dt>= and dt<= and length(dt)= group by bssid,dt
) a group by bssid)a where length(bssid)>=
)a;
上一篇:Objective-C语言Foundation框架


下一篇:jstl-将List中的数据展示到表格中