本篇只是ADS层,其他内容请关注我的博客!在<项目>专栏里!!!

本篇文章参考尚硅谷大数据项目写成!

目录

搭建ADS层

一、设备主题

1.1活跃设备数(日、周、月)

1.2 每日新增设备

1.3留存率

1.4沉默用户数

1.5本周回流用户数

1.6流失用户数

1.7最近连续三周活跃用户数

1.8最近七天内连续三天活跃用户数

二、会员主题

2.1会员信息

2.2漏斗分析

三、商品主题

3.1商品主题

3.2商品营销排名

3.3商品收藏排名

3.4商品加入购物车排名

3.5商品退款率排名(30天)

3.6商品差评率

四、营销主题

4.1下单数目统计

4.2支付信息统计

五、地区主题

六、ADS层数据导入脚本

搭建ADS层

ADS层不涉及建模,建表根据具体需求而定。

一、设备主题

1.1活跃设备数(日、周、月)

需求定义:

日活:当日活跃的设备数

周活:当周活跃的设备数

月活:当月活跃的设备数

1)建表语句

create external table ads_uv_count(

    `dt` string COMMENT '统计日期',

    `day_count` bigint COMMENT '当日用户数量',

    `wk_count` bigint COMMENT '当周用户数量',

    `mn_count` bigint COMMENT '当月用户数量',

    `is_weekend` string COMMENT 'Y,N是否是周末,用于得到本周最终结果',

    `is_monthend` string COMMENT 'Y,N是否是月末,用于得到本月最终结果'

) COMMENT '活跃设备数'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_uv_count/';

2)导入数据

insert into table ads_uv_count

select

'2022-05-20' dt,

daycount.ct,

wkcount.ct,

mncount.ct,

if(date_add(next_day('2022-05-20','MO'),-1)='2022-05-20','Y','N') ,

if(last_day('2022-05-20')='2022-05-20','Y','N')

from

(

select

'2022-05-20' dt,

count(*) ct

from dwt_uv_topic

where login_date_last='2022-05-20'

)daycount join

(

select

'2022-05-20' dt,

count (*) ct

from dwt_uv_topic

where login_date_last>=date_add(next_day('2022-05-20','MO'),-7)

and login_date_last<= date_add(next_day('2022-05-20','MO'),-1)

) wkcount on daycount.dt=wkcount.dt

join

(

select

'2022-05-20' dt,

count (*) ct

from dwt_uv_topic

where date_format(login_date_last,'yyyy-MM')=date_format('2022-05-20','yyyy-MM')

)mncount on daycount.dt=mncount.dt;

3)查询导入结果

select * from ads_uv_count;

1.2 每日新增设备

1)建表语句

create external table ads_new_mid_count

(

`create_date` string comment '创建时间' ,

`new_mid_count` BIGINT comment '新增设备数量'

) COMMENT '每日新增设备数量'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_new_mid_count/';

2)导入数据

insert into table ads_new_mid_count

select

'2022-05-20',

count(*)

from dwt_uv_topic

where login_date_first='2022-05-20';

3)查询导入结果

select * from ads_new_mid_count;

1.3留存率

1)建表语句

create external table ads_user_retention_day_rate

(

`stat_date` string comment '统计日期',

`create_date` string comment '设备新增日期',

`retention_day` int comment '截止当前日期留存天数',

`retention_count` bigint comment '留存数量',

`new_mid_count` bigint comment '设备新增数量',

`retention_ratio` decimal(16,2) comment '留存率'

) COMMENT '留存率'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_user_retention_day_rate/';

2)导入数据

insert into table ads_user_retention_day_rate

select

'2022-05-20',

date_add('2022-05-20',-1),

1,--留存天数

sum(if(login_date_first=date_add('2022-05-20',-1) and login_date_last='2022-05-20',1,0)),

sum(if(login_date_first=date_add('2022-05-20',-1),1,0)),

sum(if(login_date_first=date_add('2022-05-20',-1) and login_date_last='2022-05-20',1,0))/sum(if(login_date_first=date_add('2022-05-20',-1),1,0))*100

from dwt_uv_topic

union all

select

'2022-05-20',

date_add('2022-05-20',-2),

2,

sum(if(login_date_first=date_add('2022-05-20',-2) and login_date_last='2022-05-20',1,0)),

sum(if(login_date_first=date_add('2022-05-20',-2),1,0)),

sum(if(login_date_first=date_add('2022-05-20',-2) and login_date_last='2022-05-20',1,0))/sum(if(login_date_first=date_add('2022-05-20',-2),1,0))*100

from dwt_uv_topic

union all

select

'2022-05-20',

date_add('2022-05-20',-3),

3,

sum(if(login_date_first=date_add('2022-05-20',-3) and login_date_last='2022-05-20',1,0)),

sum(if(login_date_first=date_add('2022-05-20',-3),1,0)),

sum(if(login_date_first=date_add('2022-05-20',-3) and login_date_last='2022-05-20',1,0))/sum(if(login_date_first=date_add('2022-05-20',-3),1,0))*100

from dwt_uv_topic;

3)查询导入结果

select * from ads_user_retention_day_rate;

1.4沉默用户数

1)建表语句

create external table ads_silent_count(

`dt` string COMMENT '统计日期',

`silent_count` bigint COMMENT '沉默设备数'

) COMMENT '沉默用户数'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_silent_count';

2)导入数据(2022-05-20)

insert into table ads_silent_count

select

'2022-05-20',

count(*)

from dwt_uv_topic

where login_date_first=login_date_last

and login_date_last<=date_add('2022-05-20',-7);

3)查询导入结果

select * from ads_silent_count;

1.5本周回流用户数

需求定义:

本周回流用户:上周未活跃,本周活跃的设备,且不是本周新增设备

1)建表语句

create external table ads_back_count(

`dt` string COMMENT '统计日期',

`wk_dt` string COMMENT '统计日期所在周',

`wastage_count` bigint COMMENT '回流设备数'

) COMMENT '本周回流用户数'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_back_count';

2)导入数据

insert into table ads_back_count

select

'2022-05-20',

concat(date_add(next_day('2022-05-20','MO'),-7),'_', date_add(next_day('2022-05-20','MO'),-1)),

count(*)

from

(

select

mid_id

from dwt_uv_topic

where login_date_last>=date_add(next_day('2022-05-20','MO'),-7)

and login_date_last<= date_add(next_day('2022-05-20','MO'),-1)

and login_date_first

)current_wk

left join

(

select

mid_id

from dws_uv_detail_daycount

where dt>=date_add(next_day('2022-05-20','MO'),-7*2)

and dt<= date_add(next_day('2022-05-20','MO'),-7-1)

group by mid_id

)last_wk

on current_wk.mid_id=last_wk.mid_id

where last_wk.mid_id is null;

3)查询导入结果

select * from ads_back_count;

1.6流失用户数

需求定义:

流失用户:最近7天未活跃的设备

1)建表语句

create external table ads_wastage_count(

`dt` string COMMENT '统计日期',

`wastage_count` bigint COMMENT '流失设备数'

) COMMENT '流失用户数'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_wastage_count';

2)导入数据(2022-05-20)

insert into table ads_wastage_count

select

'2022-05-20',

count(*)

from

(

select

mid_id

from dwt_uv_topic

where login_date_last<=date_add('2022-05-20',-7)

group by mid_id

)t1;

3)查询导入结果

select * from ads_wastage_count;

1.7最近连续三周活跃用户数

1)建表语句

create external table ads_continuity_wk_count(

`dt` string COMMENT '统计日期,一般用结束周周日日期,如果每天计算一次,可用当天日期',

`wk_dt` string COMMENT '持续时间',

`continuity_count` bigint COMMENT '活跃用户数'

) COMMENT '最近连续三周活跃用户数'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_continuity_wk_count';

2)导入数据(2022-05-20所在周)

insert into table ads_continuity_wk_count

select

'2022-05-20',

concat(date_add(next_day('2022-05-20','MO'),-7*3),'_',date_add(next_day('2022-05-20','MO'),-1)),

count(*)

from

(

select

mid_id

from

(

select

mid_id

from dws_uv_detail_daycount

where dt>=date_add(next_day('2022-05-20','monday'),-7)

and dt<=date_add(next_day('2022-05-20','monday'),-1)

group by mid_id

union all

select

mid_id

from dws_uv_detail_daycount

where dt>=date_add(next_day('2022-05-20','monday'),-7*2)

and dt<=date_add(next_day('2022-05-20','monday'),-7-1)

group by mid_id

union all

select

mid_id

from dws_uv_detail_daycount

where dt>=date_add(next_day('2022-05-20','monday'),-7*3)

and dt<=date_add(next_day('2022-05-20','monday'),-7*2-1)

group by mid_id

)t1

group by mid_id

having count(*)=3

)t2;

3)查询导入结果

select * from ads_continuity_wk_count;

1.8最近七天内连续三天活跃用户数

1)建表语句

create external table ads_continuity_uv_count(

`dt` string COMMENT '统计日期',

`wk_dt` string COMMENT '最近7天日期',

`continuity_count` bigint

) COMMENT '最近七天内连续三天活跃用户数'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_continuity_uv_count';

2)导入数据

insert into table ads_continuity_uv_count

select

'2022-05-20',

concat(date_add('2022-05-20',-6),'_','2022-05-20'),

count(*)

from

(

select mid_id

from

(

select mid_id

from

(

select

mid_id,

date_sub(dt,rank) date_dif

from

(

select

mid_id,

dt,

rank() over(partition by mid_id order by dt) rank

from dws_uv_detail_daycount

where dt>=date_add('2022-05-20',-6) and dt<='2022-05-20'

)t1

)t2

group by mid_id,date_dif

having count(*)>=3

)t3

group by mid_id

)t4;

3)查询导入结果

select * from ads_continuity_uv_count;

二、会员主题

2.1会员信息

1)建表语句

create external table ads_user_topic(

`dt` string COMMENT '统计日期',

`day_users` string COMMENT '活跃会员数',

`day_new_users` string COMMENT '新增会员数',

`day_new_payment_users` string COMMENT '新增消费会员数',

`payment_users` string COMMENT '总付费会员数',

`users` string COMMENT '总会员数',

`day_users2users` decimal(16,2) COMMENT '会员活跃率',

`payment_users2users` decimal(16,2) COMMENT '会员付费率',

`day_new_users2users` decimal(16,2) COMMENT '会员新鲜度'

) COMMENT '会员信息表'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_user_topic';

2)导入数据

insert into table ads_user_topic

select

'2022-05-20',

sum(if(login_date_last='2022-05-20',1,0)),

sum(if(login_date_first='2022-05-20',1,0)),

sum(if(payment_date_first='2022-05-20',1,0)),

sum(if(payment_count>0,1,0)),

count(*),

sum(if(login_date_last='2022-05-20',1,0))/count(*),

sum(if(payment_count>0,1,0))/count(*),

sum(if(login_date_first='2022-05-20',1,0))/sum(if(login_date_last='2022-05-20',1,0))

from dwt_user_topic;

3)查询导入结果

select * from ads_user_topic;

2.2漏斗分析

统计“浏览首页->浏览商品详情页->加入购物车->下单->支付”的转化率

思路:统计各个行为的人数,然后计算比值。

1)建表语句

create external table ads_user_action_convert_day(

`dt` string COMMENT '统计日期',

`home_count` bigint COMMENT '浏览首页人数',

`good_detail_count` bigint COMMENT '浏览商品详情页人数',

`home2good_detail_convert_ratio` decimal(16,2) COMMENT '首页到商品详情转化率',

`cart_count` bigint COMMENT '加入购物车的人数',

`good_detail2cart_convert_ratio` decimal(16,2) COMMENT '商品详情页到加入购物车转化率',

`order_count` bigint COMMENT '下单人数',

`cart2order_convert_ratio` decimal(16,2) COMMENT '加入购物车到下单转化率',

`payment_amount` bigint COMMENT '支付人数',

`order2payment_convert_ratio` decimal(16,2) COMMENT '下单到支付的转化率'

) COMMENT '漏斗分析'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_user_action_convert_day/';

2)导入数据

with

tmp_uv as

(

select

'2022-05-20' dt,

sum(if(array_contains(pages,'home'),1,0)) home_count,

sum(if(array_contains(pages,'good_detail'),1,0)) good_detail_count

from

(

select

mid_id,

collect_set(page_id) pages

from dwd_page_log

where dt='2022-05-20'

and page_id in ('home','good_detail')

group by mid_id

)tmp

),

tmp_cop as

(

select

'2022-05-20' dt,

sum(if(cart_count>0,1,0)) cart_count,

sum(if(order_count>0,1,0)) order_count,

sum(if(payment_count>0,1,0)) payment_count

from dws_user_action_daycount

where dt='2022-05-20'

)

insert into table ads_user_action_convert_day

select

tmp_uv.dt,

tmp_uv.home_count,

tmp_uv.good_detail_count,

tmp_uv.good_detail_count/tmp_uv.home_count*100,

tmp_cop.cart_count,

tmp_cop.cart_count/tmp_uv.good_detail_count*100,

tmp_cop.order_count,

tmp_cop.order_count/tmp_cop.cart_count*100,

tmp_cop.payment_count,

tmp_cop.payment_count/tmp_cop.order_count*100

from tmp_uv

join tmp_cop

on tmp_uv.dt=tmp_cop.dt;

3)查询导入结果

select * from ads_user_action_convert_day;

三、商品主题

3.1商品主题

1)建表语句

create external table ads_product_info(

`dt` string COMMENT '统计日期',

`sku_num` string COMMENT 'sku个数',

`spu_num` string COMMENT 'spu个数'

) COMMENT '商品个数信息'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_product_info';

2)导入数据

insert into table ads_product_info

select

'2022-05-20' dt,

sku_num,

spu_num

from

(

select

'2022-05-20' dt,

count(*) sku_num

from

dwt_sku_topic

) tmp_sku_num

join

(

select

'2022-05-20' dt,

count(*) spu_num

from

(

select

spu_id

from

dwt_sku_topic

group by

spu_id

) tmp_spu_id

) tmp_spu_num

on tmp_sku_num.dt=tmp_spu_num.dt;

3)查询导入结果

select * from ads_product_info;

 

3.2商品营销排名

1)建表语句

create external table ads_product_sale_topN(

`dt` string COMMENT '统计日期',

`sku_id` string COMMENT '商品ID',

`payment_amount` bigint COMMENT '销量'

) COMMENT '商品销量排名'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_product_sale_topN';

2)导入数据

insert into table ads_product_sale_topN

select

'2022-05-20' dt,

sku_id,

payment_amount

from

dws_sku_action_daycount

where

dt='2022-05-20'

order by payment_amount desc

limit 10;

3)查询导入结果

select * from ads_product_sale_topN;

3.3商品收藏排名

1)建表语句

create external table ads_product_favor_topN(

`dt` string COMMENT '统计日期',

`sku_id` string COMMENT '商品ID',

`favor_count` bigint COMMENT '收藏量'

) COMMENT '商品收藏排名'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_product_favor_topN';

 2)导入数据

insert into table ads_product_favor_topN

select

'2022-05-20' dt,

sku_id,

favor_count

from

dws_sku_action_daycount

where

dt='2022-05-20'

order by favor_count desc

limit 10;

3)查询导入结果

select * from ads_product_favor_topN;

3.4商品加入购物车排名

1)建表语句

create external table ads_product_cart_topN(

`dt` string COMMENT '统计日期',

`sku_id` string COMMENT '商品ID',

`cart_count` bigint COMMENT '加入购物车次数'

) COMMENT '商品加入购物车排名'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_product_cart_topN';

2)导入数据

insert into table ads_product_cart_topN

select

'2022-05-20' dt,

sku_id,

cart_count

from

dws_sku_action_daycount

where

dt='2022-05-20'

order by cart_count desc

limit 10;

3)查询导入结果

select * from ads_product_cart_topN;

3.5商品退款率排名(30天)

1)建表语句

create external table ads_product_refund_topN(

`dt` string COMMENT '统计日期',

`sku_id` string COMMENT '商品ID',

`refund_ratio` decimal(16,2) COMMENT '退款率'

) COMMENT '商品退款率排名'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_product_refund_topN';

2)导入数据

insert into table ads_product_refund_topN

select

'2022-05-20',

sku_id,

refund_last_30d_count/payment_last_30d_count*100 refund_ratio

from dwt_sku_topic

order by refund_ratio desc

limit 10;

3)查询导入结果

select * from ads_product_refund_topN;

3.6商品差评率

1)建表语句

create external table ads_appraise_bad_topN(

`dt` string COMMENT '统计日期',

`sku_id` string COMMENT '商品ID',

`appraise_bad_ratio` decimal(16,2) COMMENT '差评率'

) COMMENT '商品差评率'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_appraise_bad_topN';

2)导入数据

insert into table ads_appraise_bad_topN

select

'2022-05-20' dt,

sku_id,

appraise_bad_count/(appraise_good_count+appraise_mid_count+appraise_bad_count+appraise_default_count) appraise_bad_ratio

from

dws_sku_action_daycount

where

dt='2022-05-20'

order by appraise_bad_ratio desc

limit 10;

3)查询导入结果

select * from ads_appraise_bad_topN;

四、营销主题

4.1下单数目统计

需求分析:统计每日下单数,下单金额及下单用户数。

1)建表语句

create external table ads_order_daycount(

dt string comment '统计日期',

order_count bigint comment '单日下单笔数',

order_amount bigint comment '单日下单金额',

order_users bigint comment '单日下单用户数'

) comment '下单数目统计'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_order_daycount';

2)导入数据

insert into table ads_order_daycount

select

'2022-05-20',

sum(order_count),

sum(order_amount),

sum(if(order_count>0,1,0))

from dws_user_action_daycount

where dt='2022-05-20';

3)查询导入结果

select * from ads_order_daycount;

4.2支付信息统计

1)建表语句

create external table ads_payment_daycount(

dt string comment '统计日期',

order_count bigint comment '单日支付笔数',

order_amount bigint comment '单日支付金额',

payment_user_count bigint comment '单日支付人数',

payment_sku_count bigint comment '单日支付商品数',

payment_avg_time decimal(16,2) comment '下单到支付的平均时长,取分钟数'

) comment '支付信息统计'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_payment_daycount';

2)导入数据

insert into table ads_payment_daycount

select

tmp_payment.dt,

tmp_payment.payment_count,

tmp_payment.payment_amount,

tmp_payment.payment_user_count,

tmp_skucount.payment_sku_count,

tmp_time.payment_avg_time

from

(

select

'2022-05-20' dt,

sum(payment_count) payment_count,

sum(payment_amount) payment_amount,

sum(if(payment_count>0,1,0)) payment_user_count

from dws_user_action_daycount

where dt='2022-05-20'

)tmp_payment

join

(

select

'2022-05-20' dt,

sum(if(payment_count>0,1,0)) payment_sku_count

from dws_sku_action_daycount

where dt='2022-05-20'

)tmp_skucount on tmp_payment.dt=tmp_skucount.dt

join

(

select

'2022-05-20' dt,

sum(unix_timestamp(payment_time)-unix_timestamp(create_time))/count(*)/60 payment_avg_time

from dwd_fact_order_info

where dt='2022-05-20'

and payment_time is not null

)tmp_time on tmp_payment.dt=tmp_time.dt;

3)查询导入结果

select * from ads_payment_daycount;

4.3品牌复购率统计

1)建表语句

create external table ads_sale_tm_category1_stat_mn

(

tm_id string comment '品牌id',

category1_id string comment '1级品类id ',

category1_name string comment '1级品类名称 ',

buycount bigint comment '购买人数',

buy_twice_last bigint comment '两次以上购买人数',

buy_twice_last_ratio decimal(16,2) comment '单次复购率',

buy_3times_last bigint comment '三次以上购买人数',

buy_3times_last_ratio decimal(16,2) comment '多次复购率',

stat_mn string comment '统计月份',

stat_date string comment '统计日期'

) COMMENT '品牌复购率统计'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_sale_tm_category1_stat_mn/';

2)导入数据

with

tmp_order as

(

select

user_id,

order_stats_struct.sku_id sku_id,

order_stats_struct.order_count order_count

from dws_user_action_daycount lateral view explode(order_detail_stats) tmp as order_stats_struct

where date_format(dt,'yyyy-MM')=date_format('2022-05-20','yyyy-MM')

),

tmp_sku as

(

select

id,

tm_id,

category1_id,

category1_name

from dwd_dim_sku_info

where dt='2022-05-20'

)

insert into table ads_sale_tm_category1_stat_mn

select

tm_id,

category1_id,

category1_name,

sum(if(order_count>=1,1,0)) buycount,

sum(if(order_count>=2,1,0)) buyTwiceLast,

sum(if(order_count>=2,1,0))/sum( if(order_count>=1,1,0)) buyTwiceLastRatio,

sum(if(order_count>=3,1,0)) buy3timeLast ,

sum(if(order_count>=3,1,0))/sum( if(order_count>=1,1,0)) buy3timeLastRatio ,

date_format('2022-05-20' ,'yyyy-MM') stat_mn,

'2022-05-20' stat_date

from

(

select

tmp_order.user_id,

tmp_sku.category1_id,

tmp_sku.category1_name,

tmp_sku.tm_id,

sum(order_count) order_count

from tmp_order

join tmp_sku

on tmp_order.sku_id=tmp_sku.id

group by tmp_order.user_id,tmp_sku.category1_id,tmp_sku.category1_name,tmp_sku.tm_id

)tmp

group by tm_id, category1_id, category1_name;

3)查询导入结果

select * from ads_sale_tm_category1_stat_mn;

五、地区主题

5.1地区主题信息

1)建表语句

create external table ads_area_topic(

`dt` string COMMENT '统计日期',

`id` bigint COMMENT '编号',

`province_name` string COMMENT '省份名称',

`area_code` string COMMENT '地区编码',

`iso_code` string COMMENT 'iso编码',

`region_id` string COMMENT '地区ID',

`region_name` string COMMENT '地区名称',

`login_day_count` bigint COMMENT '当天活跃设备数',

`order_day_count` bigint COMMENT '当天下单次数',

`order_day_amount` decimal(16,2) COMMENT '当天下单金额',

`payment_day_count` bigint COMMENT '当天支付次数',

`payment_day_amount` decimal(16,2) COMMENT '当天支付金额'

) COMMENT '地区主题信息'

row format delimited fields terminated by '\t'

location '/warehouse/gmall/ads/ads_area_topic/';

2)导入数据

insert into table ads_area_topic

select

'2022-05-20',

id,

province_name,

area_code,

iso_code,

region_id,

region_name,

login_day_count,

order_day_count,

order_day_amount,

payment_day_count,

payment_day_amount

from dwt_area_topic;

3)查询导入结果

select * from ads_area_topic;

六、ADS层数据导入脚本

vim dwt_to_ads.sh

在脚本中填写如下内容:

#!/bin/bash

hive=/training/hive/bin/hive

APP=default

# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天

if [ -n "$1" ] ;then

do_date=$1

else

do_date=`date -d "-1 day" +%F`

fi

sql="

set mapreduce.job.queuename=default;

insert into table ${APP}.ads_uv_count

select

'$do_date' dt,

daycount.ct,

wkcount.ct,

mncount.ct,

if(date_add(next_day('$do_date','MO'),-1)='$do_date','Y','N') ,

if(last_day('$do_date')='$do_date','Y','N')

from

(

select

'$do_date' dt,

count(*) ct

from ${APP}.dwt_uv_topic

where login_date_last='$do_date'

)daycount join

(

select

'$do_date' dt,

count (*) ct

from ${APP}.dwt_uv_topic

where login_date_last>=date_add(next_day('$do_date','MO'),-7)

and login_date_last<= date_add(next_day('$do_date','MO'),-1)

) wkcount on daycount.dt=wkcount.dt

join

(

select

'$do_date' dt,

count (*) ct

from ${APP}.dwt_uv_topic

where date_format(login_date_last,'yyyy-MM')=date_format('$do_date','yyyy-MM')

)mncount on daycount.dt=mncount.dt;

insert into table ${APP}.ads_new_mid_count

select

login_date_first,

count(*)

from ${APP}.dwt_uv_topic

where login_date_first='$do_date'

group by login_date_first;

insert into table ${APP}.ads_silent_count

select

'$do_date',

count(*)

from ${APP}.dwt_uv_topic

where login_date_first=login_date_last

and login_date_last<=date_add('$do_date',-7);

insert into table ${APP}.ads_back_count

select

'$do_date',

concat(date_add(next_day('$do_date','MO'),-7),'_', date_add(next_day('$do_date','MO'),-1)),

count(*)

from

(

select

mid_id

from ${APP}.dwt_uv_topic

where login_date_last>=date_add(next_day('$do_date','MO'),-7)

and login_date_last<= date_add(next_day('$do_date','MO'),-1)

and login_date_first

)current_wk

left join

(

select

mid_id

from ${APP}.dws_uv_detail_daycount

where dt>=date_add(next_day('$do_date','MO'),-7*2)

and dt<= date_add(next_day('$do_date','MO'),-7-1)

group by mid_id

)last_wk

on current_wk.mid_id=last_wk.mid_id

where last_wk.mid_id is null;

insert into table ${APP}.ads_wastage_count

select

'$do_date',

count(*)

from

(

select

mid_id

from ${APP}.dwt_uv_topic

where login_date_last<=date_add('$do_date',-7)

group by mid_id

)t1;

insert into table ${APP}.ads_user_retention_day_rate

select

'$do_date',--统计日期

date_add('$do_date',-1),--新增日期

1,--留存天数

sum(if(login_date_first=date_add('$do_date',-1) and login_date_last='$do_date',1,0)),--$do_date的1日留存数

sum(if(login_date_first=date_add('$do_date',-1),1,0)),--$do_date新增

sum(if(login_date_first=date_add('$do_date',-1) and login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date',-1),1,0))*100

from ${APP}.dwt_uv_topic

union all

select

'$do_date',--统计日期

date_add('$do_date',-2),--新增日期

2,--留存天数

sum(if(login_date_first=date_add('$do_date',-2) and login_date_last='$do_date',1,0)),--$do_date的2日留存数

sum(if(login_date_first=date_add('$do_date',-2),1,0)),--$do_date新增

sum(if(login_date_first=date_add('$do_date',-2) and login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date',-2),1,0))*100

from ${APP}.dwt_uv_topic

union all

select

'$do_date',--统计日期

date_add('$do_date',-3),--新增日期

3,--留存天数

sum(if(login_date_first=date_add('$do_date',-3) and login_date_last='$do_date',1,0)),--$do_date的3日留存数

sum(if(login_date_first=date_add('$do_date',-3),1,0)),--$do_date新增

sum(if(login_date_first=date_add('$do_date',-3) and login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date',-3),1,0))*100

from ${APP}.dwt_uv_topic;

insert into table ${APP}.ads_continuity_wk_count

select

'$do_date',

concat(date_add(next_day('$do_date','MO'),-7*3),'_',date_add(next_day('$do_date','MO'),-1)),

count(*)

from

(

select

mid_id

from

(

select

mid_id

from ${APP}.dws_uv_detail_daycount

where dt>=date_add(next_day('$do_date','monday'),-7)

and dt<=date_add(next_day('$do_date','monday'),-1)

group by mid_id

union all

select

mid_id

from ${APP}.dws_uv_detail_daycount

where dt>=date_add(next_day('$do_date','monday'),-7*2)

and dt<=date_add(next_day('$do_date','monday'),-7-1)

group by mid_id

union all

select

mid_id

from ${APP}.dws_uv_detail_daycount

where dt>=date_add(next_day('$do_date','monday'),-7*3)

and dt<=date_add(next_day('$do_date','monday'),-7*2-1)

group by mid_id

)t1

group by mid_id

having count(*)=3

)t2;

insert into table ${APP}.ads_continuity_uv_count

select

'$do_date',

concat(date_add('$do_date',-6),'_','$do_date'),

count(*)

from

(

select mid_id

from

(

select mid_id

from

(

select

mid_id,

date_sub(dt,rank) date_dif

from

(

select

mid_id,

dt,

rank() over(partition by mid_id order by dt) rank

from ${APP}.dws_uv_detail_daycount

where dt>=date_add('$do_date',-6) and dt<='$do_date'

)t1

)t2

group by mid_id,date_dif

having count(*)>=3

)t3

group by mid_id

)t4;

insert into table ${APP}.ads_user_topic

select

'$do_date',

sum(if(login_date_last='$do_date',1,0)),

sum(if(login_date_first='$do_date',1,0)),

sum(if(payment_date_first='$do_date',1,0)),

sum(if(payment_count>0,1,0)),

count(*),

sum(if(login_date_last='$do_date',1,0))/count(*),

sum(if(payment_count>0,1,0))/count(*),

sum(if(login_date_first='$do_date',1,0))/sum(if(login_date_last='$do_date',1,0))

from ${APP}.dwt_user_topic;

with

tmp_uv as

(

select

'$do_date' dt,

sum(if(array_contains(pages,'home'),1,0)) home_count,

sum(if(array_contains(pages,'good_detail'),1,0)) good_detail_count

from

(

select

mid_id,

collect_set(page_id) pages

from ${APP}.dwd_page_log

where dt='$do_date'

and page_id in ('home','good_detail')

group by mid_id

)tmp

),

tmp_cop as

(

select

'$do_date' dt,

sum(if(cart_count>0,1,0)) cart_count,

sum(if(order_count>0,1,0)) order_count,

sum(if(payment_count>0,1,0)) payment_count

from ${APP}.dws_user_action_daycount

where dt='$do_date'

)

insert into table ${APP}.ads_user_action_convert_day

select

tmp_uv.dt,

tmp_uv.home_count,

tmp_uv.good_detail_count,

tmp_uv.good_detail_count/tmp_uv.home_count*100,

tmp_cop.cart_count,

tmp_cop.cart_count/tmp_uv.good_detail_count*100,

tmp_cop.order_count,

tmp_cop.order_count/tmp_cop.cart_count*100,

tmp_cop.payment_count,

tmp_cop.payment_count/tmp_cop.order_count*100

from tmp_uv

join tmp_cop

on tmp_uv.dt=tmp_cop.dt;

insert into table ${APP}.ads_product_info

select

'$do_date' dt,

sku_num,

spu_num

from

(

select

'$do_date' dt,

count(*) sku_num

from

${APP}.dwt_sku_topic

) tmp_sku_num

join

(

select

'$do_date' dt,

count(*) spu_num

from

(

select

spu_id

from

${APP}.dwt_sku_topic

group by

spu_id

) tmp_spu_id

) tmp_spu_num

on

tmp_sku_num.dt=tmp_spu_num.dt;

insert into table ${APP}.ads_product_sale_topN

select

'$do_date' dt,

sku_id,

payment_amount

from

${APP}.dws_sku_action_daycount

where

dt='$do_date'

order by payment_amount desc

limit 10;

insert into table ${APP}.ads_product_favor_topN

select

'$do_date' dt,

sku_id,

favor_count

from

${APP}.dws_sku_action_daycount

where

dt='$do_date'

order by favor_count desc

limit 10;

insert into table ${APP}.ads_product_cart_topN

select

'$do_date' dt,

sku_id,

cart_count

from

${APP}.dws_sku_action_daycount

where

dt='$do_date'

order by cart_count desc

limit 10;

insert into table ${APP}.ads_product_refund_topN

select

'$do_date',

sku_id,

refund_last_30d_count/payment_last_30d_count*100 refund_ratio

from ${APP}.dwt_sku_topic

order by refund_ratio desc

limit 10;

insert into table ${APP}.ads_appraise_bad_topN

select

'$do_date' dt,

sku_id,

appraise_bad_count/(appraise_good_count+appraise_mid_count+appraise_bad_count+appraise_default_count) appraise_bad_ratio

from

${APP}.dws_sku_action_daycount

where

dt='$do_date'

order by appraise_bad_ratio desc

limit 10;

insert into table ${APP}.ads_order_daycount

select

'$do_date',

sum(order_count),

sum(order_amount),

sum(if(order_count>0,1,0))

from ${APP}.dws_user_action_daycount

where dt='$do_date';

insert into table ${APP}.ads_payment_daycount

select

tmp_payment.dt,

tmp_payment.payment_count,

tmp_payment.payment_amount,

tmp_payment.payment_user_count,

tmp_skucount.payment_sku_count,

tmp_time.payment_avg_time

from

(

select

'$do_date' dt,

sum(payment_count) payment_count,

sum(payment_amount) payment_amount,

sum(if(payment_count>0,1,0)) payment_user_count

from ${APP}.dws_user_action_daycount

where dt='$do_date'

)tmp_payment

join

(

select

'$do_date' dt,

sum(if(payment_count>0,1,0)) payment_sku_count

from ${APP}.dws_sku_action_daycount

where dt='$do_date'

)tmp_skucount on tmp_payment.dt=tmp_skucount.dt

join

(

select

'$do_date' dt,

sum(unix_timestamp(payment_time)-unix_timestamp(create_time))/count(*)/60 payment_avg_time

from ${APP}.dwd_fact_order_info

where dt='$do_date'

and payment_time is not null

)tmp_time on tmp_payment.dt=tmp_time.dt;

with

tmp_order as

(

select

user_id,

order_stats_struct.sku_id sku_id,

order_stats_struct.order_count order_count

from ${APP}.dws_user_action_daycount lateral view explode(order_detail_stats) tmp as order_stats_struct

where date_format(dt,'yyyy-MM')=date_format('$do_date','yyyy-MM')

),

tmp_sku as

(

select

id,

tm_id,

category1_id,

category1_name

from ${APP}.dwd_dim_sku_info

where dt='$do_date'

)

insert into table ${APP}.ads_sale_tm_category1_stat_mn

select

tm_id,

category1_id,

category1_name,

sum(if(order_count>=1,1,0)) buycount,

sum(if(order_count>=2,1,0)) buyTwiceLast,

sum(if(order_count>=2,1,0))/sum( if(order_count>=1,1,0)) buyTwiceLastRatio,

sum(if(order_count>=3,1,0)) buy3timeLast ,

sum(if(order_count>=3,1,0))/sum( if(order_count>=1,1,0)) buy3timeLastRatio ,

date_format('$do_date' ,'yyyy-MM') stat_mn,

'$do_date' stat_date

from

(

select

tmp_order.user_id,

tmp_sku.category1_id,

tmp_sku.category1_name,

tmp_sku.tm_id,

sum(order_count) order_count

from tmp_order

join tmp_sku

on tmp_order.sku_id=tmp_sku.id

group by tmp_order.user_id,tmp_sku.category1_id,tmp_sku.category1_name,tmp_sku.tm_id

)tmp

group by tm_id, category1_id, category1_name;

insert into table ${APP}.ads_area_topic

select

'$do_date',

id,

province_name,

area_code,

iso_code,

region_id,

region_name,

login_day_count,

order_day_count,

order_day_amount,

payment_day_count,

payment_day_amount

from ${APP}.dwt_area_topic;

"

$hive -e "$sql"

2)增加脚本执行权限 chmod 777 dwt_to_ads.sh

3)执行脚本导入数据 dwt_to_ads.sh 2022-05-21

4)查看导入数据

select * from ads_uv_count;

select * from ads_new_mid_count;

select * from ads_silent_count;

select * from ads_back_count';

select * from ads_wastage_count;

select * from ads_user_retention_day_rate;

select * from ads_continuity_wk_count';

select * from ads_continuity_uv_count;

select * from ads_user_topic where dt='2020-06-15';

select * from ads_user_action_convert_day;

select * from ads_product_info;

select * from ads_product_sale_topN;

select * from ads_product_favor_topN;

select * from ads_product_cart_topN;

select * from ads_product_refund_topN;

select * from ads_appraise_bad_topN;

select * from ads_order_daycount;

select * from ads_payment_daycount;

select * from ads_sale_tm_category1_stat_mn;

select * from ads_area_topic';

完成!!!

作者水平低,如有错误,恳请指正!谢谢!!!!!

本篇文章参考尚硅谷大数据项目写成!

相关链接

评论可见,请评论后查看内容,谢谢!!!评论后请刷新页面。