本篇只是ADS层,其他内容请关注我的博客!在<项目>专栏里!!!
本篇文章参考尚硅谷大数据项目写成!
目录
搭建ADS层
一、设备主题
1.1活跃设备数(日、周、月)
1.2 每日新增设备
1.3留存率
1.4沉默用户数
1.5本周回流用户数
1.6流失用户数
1.7最近连续三周活跃用户数
1.8最近七天内连续三天活跃用户数
二、会员主题
2.1会员信息
2.2漏斗分析
三、商品主题
3.1商品主题
3.2商品营销排名
3.3商品收藏排名
3.4商品加入购物车排名
3.5商品退款率排名(30天)
3.6商品差评率
四、营销主题
4.1下单数目统计
4.2支付信息统计
五、地区主题
六、ADS层数据导入脚本
搭建ADS层
ADS层不涉及建模,建表根据具体需求而定。
一、设备主题
1.1活跃设备数(日、周、月)
需求定义:
日活:当日活跃的设备数
周活:当周活跃的设备数
月活:当月活跃的设备数
1)建表语句
create external table ads_uv_count(
`dt` string COMMENT '统计日期',
`day_count` bigint COMMENT '当日用户数量',
`wk_count` bigint COMMENT '当周用户数量',
`mn_count` bigint COMMENT '当月用户数量',
`is_weekend` string COMMENT 'Y,N是否是周末,用于得到本周最终结果',
`is_monthend` string COMMENT 'Y,N是否是月末,用于得到本月最终结果'
) COMMENT '活跃设备数'
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_uv_count/';
2)导入数据
insert into table ads_uv_count
select
'2022-05-20' dt,
daycount.ct,
wkcount.ct,
mncount.ct,
if(date_add(next_day('2022-05-20','MO'),-1)='2022-05-20','Y','N') ,
if(last_day('2022-05-20')='2022-05-20','Y','N')
from
(
select
'2022-05-20' dt,
count(*) ct
from dwt_uv_topic
where login_date_last='2022-05-20'
)daycount join
(
select
'2022-05-20' dt,
count (*) ct
from dwt_uv_topic
where login_date_last>=date_add(next_day('2022-05-20','MO'),-7)
and login_date_last<= date_add(next_day('2022-05-20','MO'),-1)
) wkcount on daycount.dt=wkcount.dt
join
(
select
'2022-05-20' dt,
count (*) ct
from dwt_uv_topic
where date_format(login_date_last,'yyyy-MM')=date_format('2022-05-20','yyyy-MM')
)mncount on daycount.dt=mncount.dt;
3)查询导入结果
select * from ads_uv_count;
1.2 每日新增设备
1)建表语句
create external table ads_new_mid_count
(
`create_date` string comment '创建时间' ,
`new_mid_count` BIGINT comment '新增设备数量'
) COMMENT '每日新增设备数量'
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_new_mid_count/';
2)导入数据
insert into table ads_new_mid_count
select
'2022-05-20',
count(*)
from dwt_uv_topic
where login_date_first='2022-05-20';
3)查询导入结果
select * from ads_new_mid_count;
1.3留存率
1)建表语句
create external table ads_user_retention_day_rate
(
`stat_date` string comment '统计日期',
`create_date` string comment '设备新增日期',
`retention_day` int comment '截止当前日期留存天数',
`retention_count` bigint comment '留存数量',
`new_mid_count` bigint comment '设备新增数量',
`retention_ratio` decimal(16,2) comment '留存率'
) COMMENT '留存率'
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_user_retention_day_rate/';
2)导入数据
insert into table ads_user_retention_day_rate
select
'2022-05-20',
date_add('2022-05-20',-1),
1,--留存天数
sum(if(login_date_first=date_add('2022-05-20',-1) and login_date_last='2022-05-20',1,0)),
sum(if(login_date_first=date_add('2022-05-20',-1),1,0)),
sum(if(login_date_first=date_add('2022-05-20',-1) and login_date_last='2022-05-20',1,0))/sum(if(login_date_first=date_add('2022-05-20',-1),1,0))*100
from dwt_uv_topic
union all
select
'2022-05-20',
date_add('2022-05-20',-2),
2,
sum(if(login_date_first=date_add('2022-05-20',-2) and login_date_last='2022-05-20',1,0)),
sum(if(login_date_first=date_add('2022-05-20',-2),1,0)),
sum(if(login_date_first=date_add('2022-05-20',-2) and login_date_last='2022-05-20',1,0))/sum(if(login_date_first=date_add('2022-05-20',-2),1,0))*100
from dwt_uv_topic
union all
select
'2022-05-20',
date_add('2022-05-20',-3),
3,
sum(if(login_date_first=date_add('2022-05-20',-3) and login_date_last='2022-05-20',1,0)),
sum(if(login_date_first=date_add('2022-05-20',-3),1,0)),
sum(if(login_date_first=date_add('2022-05-20',-3) and login_date_last='2022-05-20',1,0))/sum(if(login_date_first=date_add('2022-05-20',-3),1,0))*100
from dwt_uv_topic;
3)查询导入结果
select * from ads_user_retention_day_rate;
1.4沉默用户数
1)建表语句
create external table ads_silent_count(
`dt` string COMMENT '统计日期',
`silent_count` bigint COMMENT '沉默设备数'
) COMMENT '沉默用户数'
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_silent_count';
2)导入数据(2022-05-20)
insert into table ads_silent_count
select
'2022-05-20',
count(*)
from dwt_uv_topic
where login_date_first=login_date_last
and login_date_last<=date_add('2022-05-20',-7);
3)查询导入结果
select * from ads_silent_count;
1.5本周回流用户数
需求定义:
本周回流用户:上周未活跃,本周活跃的设备,且不是本周新增设备
1)建表语句
create external table ads_back_count(
`dt` string COMMENT '统计日期',
`wk_dt` string COMMENT '统计日期所在周',
`wastage_count` bigint COMMENT '回流设备数'
) COMMENT '本周回流用户数'
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_back_count';
2)导入数据
insert into table ads_back_count
select
'2022-05-20',
concat(date_add(next_day('2022-05-20','MO'),-7),'_', date_add(next_day('2022-05-20','MO'),-1)),
count(*)
from
(
select
mid_id
from dwt_uv_topic
where login_date_last>=date_add(next_day('2022-05-20','MO'),-7)
and login_date_last<= date_add(next_day('2022-05-20','MO'),-1)
and login_date_first )current_wk left join ( select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day('2022-05-20','MO'),-7*2) and dt<= date_add(next_day('2022-05-20','MO'),-7-1) group by mid_id )last_wk on current_wk.mid_id=last_wk.mid_id where last_wk.mid_id is null; 3)查询导入结果 select * from ads_back_count; 1.6流失用户数 需求定义: 流失用户:最近7天未活跃的设备 1)建表语句 create external table ads_wastage_count( `dt` string COMMENT '统计日期', `wastage_count` bigint COMMENT '流失设备数' ) COMMENT '流失用户数' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_wastage_count'; 2)导入数据(2022-05-20) insert into table ads_wastage_count select '2022-05-20', count(*) from ( select mid_id from dwt_uv_topic where login_date_last<=date_add('2022-05-20',-7) group by mid_id )t1; 3)查询导入结果 select * from ads_wastage_count; 1.7最近连续三周活跃用户数 1)建表语句 create external table ads_continuity_wk_count( `dt` string COMMENT '统计日期,一般用结束周周日日期,如果每天计算一次,可用当天日期', `wk_dt` string COMMENT '持续时间', `continuity_count` bigint COMMENT '活跃用户数' ) COMMENT '最近连续三周活跃用户数' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_continuity_wk_count'; 2)导入数据(2022-05-20所在周) insert into table ads_continuity_wk_count select '2022-05-20', concat(date_add(next_day('2022-05-20','MO'),-7*3),'_',date_add(next_day('2022-05-20','MO'),-1)), count(*) from ( select mid_id from ( select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day('2022-05-20','monday'),-7) and dt<=date_add(next_day('2022-05-20','monday'),-1) group by mid_id union all select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day('2022-05-20','monday'),-7*2) and dt<=date_add(next_day('2022-05-20','monday'),-7-1) group by mid_id union all select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day('2022-05-20','monday'),-7*3) and dt<=date_add(next_day('2022-05-20','monday'),-7*2-1) group by mid_id )t1 group by mid_id having count(*)=3 )t2; 3)查询导入结果 select * from ads_continuity_wk_count; 1.8最近七天内连续三天活跃用户数 1)建表语句 create external table ads_continuity_uv_count( `dt` string COMMENT '统计日期', `wk_dt` string COMMENT '最近7天日期', `continuity_count` bigint ) COMMENT '最近七天内连续三天活跃用户数' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_continuity_uv_count'; 2)导入数据 insert into table ads_continuity_uv_count select '2022-05-20', concat(date_add('2022-05-20',-6),'_','2022-05-20'), count(*) from ( select mid_id from ( select mid_id from ( select mid_id, date_sub(dt,rank) date_dif from ( select mid_id, dt, rank() over(partition by mid_id order by dt) rank from dws_uv_detail_daycount where dt>=date_add('2022-05-20',-6) and dt<='2022-05-20' )t1 )t2 group by mid_id,date_dif having count(*)>=3 )t3 group by mid_id )t4; 3)查询导入结果 select * from ads_continuity_uv_count; 二、会员主题 2.1会员信息 1)建表语句 create external table ads_user_topic( `dt` string COMMENT '统计日期', `day_users` string COMMENT '活跃会员数', `day_new_users` string COMMENT '新增会员数', `day_new_payment_users` string COMMENT '新增消费会员数', `payment_users` string COMMENT '总付费会员数', `users` string COMMENT '总会员数', `day_users2users` decimal(16,2) COMMENT '会员活跃率', `payment_users2users` decimal(16,2) COMMENT '会员付费率', `day_new_users2users` decimal(16,2) COMMENT '会员新鲜度' ) COMMENT '会员信息表' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_user_topic'; 2)导入数据 insert into table ads_user_topic select '2022-05-20', sum(if(login_date_last='2022-05-20',1,0)), sum(if(login_date_first='2022-05-20',1,0)), sum(if(payment_date_first='2022-05-20',1,0)), sum(if(payment_count>0,1,0)), count(*), sum(if(login_date_last='2022-05-20',1,0))/count(*), sum(if(payment_count>0,1,0))/count(*), sum(if(login_date_first='2022-05-20',1,0))/sum(if(login_date_last='2022-05-20',1,0)) from dwt_user_topic; 3)查询导入结果 select * from ads_user_topic; 2.2漏斗分析 统计“浏览首页->浏览商品详情页->加入购物车->下单->支付”的转化率 思路:统计各个行为的人数,然后计算比值。 1)建表语句 create external table ads_user_action_convert_day( `dt` string COMMENT '统计日期', `home_count` bigint COMMENT '浏览首页人数', `good_detail_count` bigint COMMENT '浏览商品详情页人数', `home2good_detail_convert_ratio` decimal(16,2) COMMENT '首页到商品详情转化率', `cart_count` bigint COMMENT '加入购物车的人数', `good_detail2cart_convert_ratio` decimal(16,2) COMMENT '商品详情页到加入购物车转化率', `order_count` bigint COMMENT '下单人数', `cart2order_convert_ratio` decimal(16,2) COMMENT '加入购物车到下单转化率', `payment_amount` bigint COMMENT '支付人数', `order2payment_convert_ratio` decimal(16,2) COMMENT '下单到支付的转化率' ) COMMENT '漏斗分析' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_user_action_convert_day/'; 2)导入数据 with tmp_uv as ( select '2022-05-20' dt, sum(if(array_contains(pages,'home'),1,0)) home_count, sum(if(array_contains(pages,'good_detail'),1,0)) good_detail_count from ( select mid_id, collect_set(page_id) pages from dwd_page_log where dt='2022-05-20' and page_id in ('home','good_detail') group by mid_id )tmp ), tmp_cop as ( select '2022-05-20' dt, sum(if(cart_count>0,1,0)) cart_count, sum(if(order_count>0,1,0)) order_count, sum(if(payment_count>0,1,0)) payment_count from dws_user_action_daycount where dt='2022-05-20' ) insert into table ads_user_action_convert_day select tmp_uv.dt, tmp_uv.home_count, tmp_uv.good_detail_count, tmp_uv.good_detail_count/tmp_uv.home_count*100, tmp_cop.cart_count, tmp_cop.cart_count/tmp_uv.good_detail_count*100, tmp_cop.order_count, tmp_cop.order_count/tmp_cop.cart_count*100, tmp_cop.payment_count, tmp_cop.payment_count/tmp_cop.order_count*100 from tmp_uv join tmp_cop on tmp_uv.dt=tmp_cop.dt; 3)查询导入结果 select * from ads_user_action_convert_day; 三、商品主题 3.1商品主题 1)建表语句 create external table ads_product_info( `dt` string COMMENT '统计日期', `sku_num` string COMMENT 'sku个数', `spu_num` string COMMENT 'spu个数' ) COMMENT '商品个数信息' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_product_info'; 2)导入数据 insert into table ads_product_info select '2022-05-20' dt, sku_num, spu_num from ( select '2022-05-20' dt, count(*) sku_num from dwt_sku_topic ) tmp_sku_num join ( select '2022-05-20' dt, count(*) spu_num from ( select spu_id from dwt_sku_topic group by spu_id ) tmp_spu_id ) tmp_spu_num on tmp_sku_num.dt=tmp_spu_num.dt; 3)查询导入结果 select * from ads_product_info; 3.2商品营销排名 1)建表语句 create external table ads_product_sale_topN( `dt` string COMMENT '统计日期', `sku_id` string COMMENT '商品ID', `payment_amount` bigint COMMENT '销量' ) COMMENT '商品销量排名' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_product_sale_topN'; 2)导入数据 insert into table ads_product_sale_topN select '2022-05-20' dt, sku_id, payment_amount from dws_sku_action_daycount where dt='2022-05-20' order by payment_amount desc limit 10; 3)查询导入结果 select * from ads_product_sale_topN; 3.3商品收藏排名 1)建表语句 create external table ads_product_favor_topN( `dt` string COMMENT '统计日期', `sku_id` string COMMENT '商品ID', `favor_count` bigint COMMENT '收藏量' ) COMMENT '商品收藏排名' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_product_favor_topN'; 2)导入数据 insert into table ads_product_favor_topN select '2022-05-20' dt, sku_id, favor_count from dws_sku_action_daycount where dt='2022-05-20' order by favor_count desc limit 10; 3)查询导入结果 select * from ads_product_favor_topN; 3.4商品加入购物车排名 1)建表语句 create external table ads_product_cart_topN( `dt` string COMMENT '统计日期', `sku_id` string COMMENT '商品ID', `cart_count` bigint COMMENT '加入购物车次数' ) COMMENT '商品加入购物车排名' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_product_cart_topN'; 2)导入数据 insert into table ads_product_cart_topN select '2022-05-20' dt, sku_id, cart_count from dws_sku_action_daycount where dt='2022-05-20' order by cart_count desc limit 10; 3)查询导入结果 select * from ads_product_cart_topN; 3.5商品退款率排名(30天) 1)建表语句 create external table ads_product_refund_topN( `dt` string COMMENT '统计日期', `sku_id` string COMMENT '商品ID', `refund_ratio` decimal(16,2) COMMENT '退款率' ) COMMENT '商品退款率排名' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_product_refund_topN'; 2)导入数据 insert into table ads_product_refund_topN select '2022-05-20', sku_id, refund_last_30d_count/payment_last_30d_count*100 refund_ratio from dwt_sku_topic order by refund_ratio desc limit 10; 3)查询导入结果 select * from ads_product_refund_topN; 3.6商品差评率 1)建表语句 create external table ads_appraise_bad_topN( `dt` string COMMENT '统计日期', `sku_id` string COMMENT '商品ID', `appraise_bad_ratio` decimal(16,2) COMMENT '差评率' ) COMMENT '商品差评率' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_appraise_bad_topN'; 2)导入数据 insert into table ads_appraise_bad_topN select '2022-05-20' dt, sku_id, appraise_bad_count/(appraise_good_count+appraise_mid_count+appraise_bad_count+appraise_default_count) appraise_bad_ratio from dws_sku_action_daycount where dt='2022-05-20' order by appraise_bad_ratio desc limit 10; 3)查询导入结果 select * from ads_appraise_bad_topN; 四、营销主题 4.1下单数目统计 需求分析:统计每日下单数,下单金额及下单用户数。 1)建表语句 create external table ads_order_daycount( dt string comment '统计日期', order_count bigint comment '单日下单笔数', order_amount bigint comment '单日下单金额', order_users bigint comment '单日下单用户数' ) comment '下单数目统计' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_order_daycount'; 2)导入数据 insert into table ads_order_daycount select '2022-05-20', sum(order_count), sum(order_amount), sum(if(order_count>0,1,0)) from dws_user_action_daycount where dt='2022-05-20'; 3)查询导入结果 select * from ads_order_daycount; 4.2支付信息统计 1)建表语句 create external table ads_payment_daycount( dt string comment '统计日期', order_count bigint comment '单日支付笔数', order_amount bigint comment '单日支付金额', payment_user_count bigint comment '单日支付人数', payment_sku_count bigint comment '单日支付商品数', payment_avg_time decimal(16,2) comment '下单到支付的平均时长,取分钟数' ) comment '支付信息统计' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_payment_daycount'; 2)导入数据 insert into table ads_payment_daycount select tmp_payment.dt, tmp_payment.payment_count, tmp_payment.payment_amount, tmp_payment.payment_user_count, tmp_skucount.payment_sku_count, tmp_time.payment_avg_time from ( select '2022-05-20' dt, sum(payment_count) payment_count, sum(payment_amount) payment_amount, sum(if(payment_count>0,1,0)) payment_user_count from dws_user_action_daycount where dt='2022-05-20' )tmp_payment join ( select '2022-05-20' dt, sum(if(payment_count>0,1,0)) payment_sku_count from dws_sku_action_daycount where dt='2022-05-20' )tmp_skucount on tmp_payment.dt=tmp_skucount.dt join ( select '2022-05-20' dt, sum(unix_timestamp(payment_time)-unix_timestamp(create_time))/count(*)/60 payment_avg_time from dwd_fact_order_info where dt='2022-05-20' and payment_time is not null )tmp_time on tmp_payment.dt=tmp_time.dt; 3)查询导入结果 select * from ads_payment_daycount; 4.3品牌复购率统计 1)建表语句 create external table ads_sale_tm_category1_stat_mn ( tm_id string comment '品牌id', category1_id string comment '1级品类id ', category1_name string comment '1级品类名称 ', buycount bigint comment '购买人数', buy_twice_last bigint comment '两次以上购买人数', buy_twice_last_ratio decimal(16,2) comment '单次复购率', buy_3times_last bigint comment '三次以上购买人数', buy_3times_last_ratio decimal(16,2) comment '多次复购率', stat_mn string comment '统计月份', stat_date string comment '统计日期' ) COMMENT '品牌复购率统计' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_sale_tm_category1_stat_mn/'; 2)导入数据 with tmp_order as ( select user_id, order_stats_struct.sku_id sku_id, order_stats_struct.order_count order_count from dws_user_action_daycount lateral view explode(order_detail_stats) tmp as order_stats_struct where date_format(dt,'yyyy-MM')=date_format('2022-05-20','yyyy-MM') ), tmp_sku as ( select id, tm_id, category1_id, category1_name from dwd_dim_sku_info where dt='2022-05-20' ) insert into table ads_sale_tm_category1_stat_mn select tm_id, category1_id, category1_name, sum(if(order_count>=1,1,0)) buycount, sum(if(order_count>=2,1,0)) buyTwiceLast, sum(if(order_count>=2,1,0))/sum( if(order_count>=1,1,0)) buyTwiceLastRatio, sum(if(order_count>=3,1,0)) buy3timeLast , sum(if(order_count>=3,1,0))/sum( if(order_count>=1,1,0)) buy3timeLastRatio , date_format('2022-05-20' ,'yyyy-MM') stat_mn, '2022-05-20' stat_date from ( select tmp_order.user_id, tmp_sku.category1_id, tmp_sku.category1_name, tmp_sku.tm_id, sum(order_count) order_count from tmp_order join tmp_sku on tmp_order.sku_id=tmp_sku.id group by tmp_order.user_id,tmp_sku.category1_id,tmp_sku.category1_name,tmp_sku.tm_id )tmp group by tm_id, category1_id, category1_name; 3)查询导入结果 select * from ads_sale_tm_category1_stat_mn; 五、地区主题 5.1地区主题信息 1)建表语句 create external table ads_area_topic( `dt` string COMMENT '统计日期', `id` bigint COMMENT '编号', `province_name` string COMMENT '省份名称', `area_code` string COMMENT '地区编码', `iso_code` string COMMENT 'iso编码', `region_id` string COMMENT '地区ID', `region_name` string COMMENT '地区名称', `login_day_count` bigint COMMENT '当天活跃设备数', `order_day_count` bigint COMMENT '当天下单次数', `order_day_amount` decimal(16,2) COMMENT '当天下单金额', `payment_day_count` bigint COMMENT '当天支付次数', `payment_day_amount` decimal(16,2) COMMENT '当天支付金额' ) COMMENT '地区主题信息' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_area_topic/'; 2)导入数据 insert into table ads_area_topic select '2022-05-20', id, province_name, area_code, iso_code, region_id, region_name, login_day_count, order_day_count, order_day_amount, payment_day_count, payment_day_amount from dwt_area_topic; 3)查询导入结果 select * from ads_area_topic; 六、ADS层数据导入脚本 vim dwt_to_ads.sh 在脚本中填写如下内容: #!/bin/bash hive=/training/hive/bin/hive APP=default # 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天 if [ -n "$1" ] ;then do_date=$1 else do_date=`date -d "-1 day" +%F` fi sql=" set mapreduce.job.queuename=default; insert into table ${APP}.ads_uv_count select '$do_date' dt, daycount.ct, wkcount.ct, mncount.ct, if(date_add(next_day('$do_date','MO'),-1)='$do_date','Y','N') , if(last_day('$do_date')='$do_date','Y','N') from ( select '$do_date' dt, count(*) ct from ${APP}.dwt_uv_topic where login_date_last='$do_date' )daycount join ( select '$do_date' dt, count (*) ct from ${APP}.dwt_uv_topic where login_date_last>=date_add(next_day('$do_date','MO'),-7) and login_date_last<= date_add(next_day('$do_date','MO'),-1) ) wkcount on daycount.dt=wkcount.dt join ( select '$do_date' dt, count (*) ct from ${APP}.dwt_uv_topic where date_format(login_date_last,'yyyy-MM')=date_format('$do_date','yyyy-MM') )mncount on daycount.dt=mncount.dt; insert into table ${APP}.ads_new_mid_count select login_date_first, count(*) from ${APP}.dwt_uv_topic where login_date_first='$do_date' group by login_date_first; insert into table ${APP}.ads_silent_count select '$do_date', count(*) from ${APP}.dwt_uv_topic where login_date_first=login_date_last and login_date_last<=date_add('$do_date',-7); insert into table ${APP}.ads_back_count select '$do_date', concat(date_add(next_day('$do_date','MO'),-7),'_', date_add(next_day('$do_date','MO'),-1)), count(*) from ( select mid_id from ${APP}.dwt_uv_topic where login_date_last>=date_add(next_day('$do_date','MO'),-7) and login_date_last<= date_add(next_day('$do_date','MO'),-1) and login_date_first )current_wk left join ( select mid_id from ${APP}.dws_uv_detail_daycount where dt>=date_add(next_day('$do_date','MO'),-7*2) and dt<= date_add(next_day('$do_date','MO'),-7-1) group by mid_id )last_wk on current_wk.mid_id=last_wk.mid_id where last_wk.mid_id is null; insert into table ${APP}.ads_wastage_count select '$do_date', count(*) from ( select mid_id from ${APP}.dwt_uv_topic where login_date_last<=date_add('$do_date',-7) group by mid_id )t1; insert into table ${APP}.ads_user_retention_day_rate select '$do_date',--统计日期 date_add('$do_date',-1),--新增日期 1,--留存天数 sum(if(login_date_first=date_add('$do_date',-1) and login_date_last='$do_date',1,0)),--$do_date的1日留存数 sum(if(login_date_first=date_add('$do_date',-1),1,0)),--$do_date新增 sum(if(login_date_first=date_add('$do_date',-1) and login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date',-1),1,0))*100 from ${APP}.dwt_uv_topic union all select '$do_date',--统计日期 date_add('$do_date',-2),--新增日期 2,--留存天数 sum(if(login_date_first=date_add('$do_date',-2) and login_date_last='$do_date',1,0)),--$do_date的2日留存数 sum(if(login_date_first=date_add('$do_date',-2),1,0)),--$do_date新增 sum(if(login_date_first=date_add('$do_date',-2) and login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date',-2),1,0))*100 from ${APP}.dwt_uv_topic union all select '$do_date',--统计日期 date_add('$do_date',-3),--新增日期 3,--留存天数 sum(if(login_date_first=date_add('$do_date',-3) and login_date_last='$do_date',1,0)),--$do_date的3日留存数 sum(if(login_date_first=date_add('$do_date',-3),1,0)),--$do_date新增 sum(if(login_date_first=date_add('$do_date',-3) and login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date',-3),1,0))*100 from ${APP}.dwt_uv_topic; insert into table ${APP}.ads_continuity_wk_count select '$do_date', concat(date_add(next_day('$do_date','MO'),-7*3),'_',date_add(next_day('$do_date','MO'),-1)), count(*) from ( select mid_id from ( select mid_id from ${APP}.dws_uv_detail_daycount where dt>=date_add(next_day('$do_date','monday'),-7) and dt<=date_add(next_day('$do_date','monday'),-1) group by mid_id union all select mid_id from ${APP}.dws_uv_detail_daycount where dt>=date_add(next_day('$do_date','monday'),-7*2) and dt<=date_add(next_day('$do_date','monday'),-7-1) group by mid_id union all select mid_id from ${APP}.dws_uv_detail_daycount where dt>=date_add(next_day('$do_date','monday'),-7*3) and dt<=date_add(next_day('$do_date','monday'),-7*2-1) group by mid_id )t1 group by mid_id having count(*)=3 )t2; insert into table ${APP}.ads_continuity_uv_count select '$do_date', concat(date_add('$do_date',-6),'_','$do_date'), count(*) from ( select mid_id from ( select mid_id from ( select mid_id, date_sub(dt,rank) date_dif from ( select mid_id, dt, rank() over(partition by mid_id order by dt) rank from ${APP}.dws_uv_detail_daycount where dt>=date_add('$do_date',-6) and dt<='$do_date' )t1 )t2 group by mid_id,date_dif having count(*)>=3 )t3 group by mid_id )t4; insert into table ${APP}.ads_user_topic select '$do_date', sum(if(login_date_last='$do_date',1,0)), sum(if(login_date_first='$do_date',1,0)), sum(if(payment_date_first='$do_date',1,0)), sum(if(payment_count>0,1,0)), count(*), sum(if(login_date_last='$do_date',1,0))/count(*), sum(if(payment_count>0,1,0))/count(*), sum(if(login_date_first='$do_date',1,0))/sum(if(login_date_last='$do_date',1,0)) from ${APP}.dwt_user_topic; with tmp_uv as ( select '$do_date' dt, sum(if(array_contains(pages,'home'),1,0)) home_count, sum(if(array_contains(pages,'good_detail'),1,0)) good_detail_count from ( select mid_id, collect_set(page_id) pages from ${APP}.dwd_page_log where dt='$do_date' and page_id in ('home','good_detail') group by mid_id )tmp ), tmp_cop as ( select '$do_date' dt, sum(if(cart_count>0,1,0)) cart_count, sum(if(order_count>0,1,0)) order_count, sum(if(payment_count>0,1,0)) payment_count from ${APP}.dws_user_action_daycount where dt='$do_date' ) insert into table ${APP}.ads_user_action_convert_day select tmp_uv.dt, tmp_uv.home_count, tmp_uv.good_detail_count, tmp_uv.good_detail_count/tmp_uv.home_count*100, tmp_cop.cart_count, tmp_cop.cart_count/tmp_uv.good_detail_count*100, tmp_cop.order_count, tmp_cop.order_count/tmp_cop.cart_count*100, tmp_cop.payment_count, tmp_cop.payment_count/tmp_cop.order_count*100 from tmp_uv join tmp_cop on tmp_uv.dt=tmp_cop.dt; insert into table ${APP}.ads_product_info select '$do_date' dt, sku_num, spu_num from ( select '$do_date' dt, count(*) sku_num from ${APP}.dwt_sku_topic ) tmp_sku_num join ( select '$do_date' dt, count(*) spu_num from ( select spu_id from ${APP}.dwt_sku_topic group by spu_id ) tmp_spu_id ) tmp_spu_num on tmp_sku_num.dt=tmp_spu_num.dt; insert into table ${APP}.ads_product_sale_topN select '$do_date' dt, sku_id, payment_amount from ${APP}.dws_sku_action_daycount where dt='$do_date' order by payment_amount desc limit 10; insert into table ${APP}.ads_product_favor_topN select '$do_date' dt, sku_id, favor_count from ${APP}.dws_sku_action_daycount where dt='$do_date' order by favor_count desc limit 10; insert into table ${APP}.ads_product_cart_topN select '$do_date' dt, sku_id, cart_count from ${APP}.dws_sku_action_daycount where dt='$do_date' order by cart_count desc limit 10; insert into table ${APP}.ads_product_refund_topN select '$do_date', sku_id, refund_last_30d_count/payment_last_30d_count*100 refund_ratio from ${APP}.dwt_sku_topic order by refund_ratio desc limit 10; insert into table ${APP}.ads_appraise_bad_topN select '$do_date' dt, sku_id, appraise_bad_count/(appraise_good_count+appraise_mid_count+appraise_bad_count+appraise_default_count) appraise_bad_ratio from ${APP}.dws_sku_action_daycount where dt='$do_date' order by appraise_bad_ratio desc limit 10; insert into table ${APP}.ads_order_daycount select '$do_date', sum(order_count), sum(order_amount), sum(if(order_count>0,1,0)) from ${APP}.dws_user_action_daycount where dt='$do_date'; insert into table ${APP}.ads_payment_daycount select tmp_payment.dt, tmp_payment.payment_count, tmp_payment.payment_amount, tmp_payment.payment_user_count, tmp_skucount.payment_sku_count, tmp_time.payment_avg_time from ( select '$do_date' dt, sum(payment_count) payment_count, sum(payment_amount) payment_amount, sum(if(payment_count>0,1,0)) payment_user_count from ${APP}.dws_user_action_daycount where dt='$do_date' )tmp_payment join ( select '$do_date' dt, sum(if(payment_count>0,1,0)) payment_sku_count from ${APP}.dws_sku_action_daycount where dt='$do_date' )tmp_skucount on tmp_payment.dt=tmp_skucount.dt join ( select '$do_date' dt, sum(unix_timestamp(payment_time)-unix_timestamp(create_time))/count(*)/60 payment_avg_time from ${APP}.dwd_fact_order_info where dt='$do_date' and payment_time is not null )tmp_time on tmp_payment.dt=tmp_time.dt; with tmp_order as ( select user_id, order_stats_struct.sku_id sku_id, order_stats_struct.order_count order_count from ${APP}.dws_user_action_daycount lateral view explode(order_detail_stats) tmp as order_stats_struct where date_format(dt,'yyyy-MM')=date_format('$do_date','yyyy-MM') ), tmp_sku as ( select id, tm_id, category1_id, category1_name from ${APP}.dwd_dim_sku_info where dt='$do_date' ) insert into table ${APP}.ads_sale_tm_category1_stat_mn select tm_id, category1_id, category1_name, sum(if(order_count>=1,1,0)) buycount, sum(if(order_count>=2,1,0)) buyTwiceLast, sum(if(order_count>=2,1,0))/sum( if(order_count>=1,1,0)) buyTwiceLastRatio, sum(if(order_count>=3,1,0)) buy3timeLast , sum(if(order_count>=3,1,0))/sum( if(order_count>=1,1,0)) buy3timeLastRatio , date_format('$do_date' ,'yyyy-MM') stat_mn, '$do_date' stat_date from ( select tmp_order.user_id, tmp_sku.category1_id, tmp_sku.category1_name, tmp_sku.tm_id, sum(order_count) order_count from tmp_order join tmp_sku on tmp_order.sku_id=tmp_sku.id group by tmp_order.user_id,tmp_sku.category1_id,tmp_sku.category1_name,tmp_sku.tm_id )tmp group by tm_id, category1_id, category1_name; insert into table ${APP}.ads_area_topic select '$do_date', id, province_name, area_code, iso_code, region_id, region_name, login_day_count, order_day_count, order_day_amount, payment_day_count, payment_day_amount from ${APP}.dwt_area_topic; " $hive -e "$sql" 2)增加脚本执行权限 chmod 777 dwt_to_ads.sh 3)执行脚本导入数据 dwt_to_ads.sh 2022-05-21 4)查看导入数据 select * from ads_uv_count; select * from ads_new_mid_count; select * from ads_silent_count; select * from ads_back_count'; select * from ads_wastage_count; select * from ads_user_retention_day_rate; select * from ads_continuity_wk_count'; select * from ads_continuity_uv_count; select * from ads_user_topic where dt='2020-06-15'; select * from ads_user_action_convert_day; select * from ads_product_info; select * from ads_product_sale_topN; select * from ads_product_favor_topN; select * from ads_product_cart_topN; select * from ads_product_refund_topN; select * from ads_appraise_bad_topN; select * from ads_order_daycount; select * from ads_payment_daycount; select * from ads_sale_tm_category1_stat_mn; select * from ads_area_topic'; 完成!!! 作者水平低,如有错误,恳请指正!谢谢!!!!! 本篇文章参考尚硅谷大数据项目写成! 相关链接
发表评论