Data warehouse project order analysis time dimension analysis business development
Summary requirement 1: ads data are from DW, DW data are from ods. Ads one table stores all data of four requirements
Prepare date data
drop table if exists `itcast_dw`.`dim_date`; CREATE TABLE `itcast_dw`.`dim_date`( `date_key` string, `date_value` string, `day_in_year` string, `day_in_month` string, `is_first_day_in_month` string, `is_last_day_in_month` string, `weekday` string, `week_in_month` string, `is_first_day_in_week` string, `is_dayoff` string, `is_workday` string, `is_holiday` string, `date_type` string, `month_number` string, `year` string, `year_month_number` string, `quarter_name` string, `quarter_number` string, `year_quarter` string) STORED AS PARQUET;
1.1 demand analysis
Statistics of sales orders and total orders of each quarter in 2019
Statistics of the number of sales orders and total orders of each month in 2019
Statistics of sales orders and total orders from Monday to Sunday in 2019
Statistics of the number of orders and total orders of national legal holidays, rest days and working days in 2019
1.2 create ads layer data table
drop table if exists
itcast_ads
.ads_order_date
;
create tableitcast_ads
.ads_order_date
(
date_type string, – Time dimension type
date_val string, – Time dimension value
order_count bigint, – Number of orders
order_money double – Order transaction amount
)
partitioned by (dt string)
STORED AS PARQUET;
1.3 create dw data table
DROP TABLE IF EXISTS
itcast_dw
.fact_order_goods
;
CREATE TABLEitcast_dw
.fact_order_goods
(ogid
bigint,orderid
bigint,goodsid
bigint,goodsnum
bigint,goodsprice
double,goodsspecid
bigint,goodsspecnames
string,goodsname
string,goodsimg
string,extrajson
string,goodstype
bigint,commissionrate
double,goodscode
string,createtime
string)
partitioned by (dt string)
STORED AS PARQUET;
1.4 data loading
1 load order detail data to dw layer
Load all data from January 1 to September 9 to dw layer
insert overwrite table
itcast_dw
.fact_order_goods
partition(dt='20190908')
selectogid
,orderid
,goodsid
,goodsnum
,goodsprice
,goodsspecid
,goodsspecnames
,goodsname
,goodsimg
,extrajson
,goodstype
,commissionrate
,goodscode
,createtime
fromitcast_ods
.itcast_order_goods
where dt='20190908';
– Test code
select * fromitcast_dw
.fact_order_goods
where dt = '20190908' limit 10;
2. Count the number of sales orders and total orders of each quarter in the half year period
Time dimension type:
1 → quarter
2 → monthly
3 → week
4 → date type (holidays, rest days, working days)
Count the number of sales orders and total orders of each quarter in 2019
insert overwrite table
itcast_ads
.ads_order_date
partition(dt='20190908')
select
'1' as date_type, – Time dimension type, 1 for quarter
t1.year_quarter as date_val, – Quarter name
count(distinct t2.orderid), – Total orders
sum(t2.goodsprice*t2.goodsnum) – Order total
fromitcast_dw
.dim_date
t1
left join
(select * fromitcast_dw
.fact_order_goods
where dt='20190908') t2
on t1.date_value = substr(createtime, 1, 10)
group by t1. year_quarter
order by t1.year_quarter;
– test
select * fromitcast_ads
.ads_order_date
where dt='20190908' and date_type = '1';
3. Count the number of sales orders and total orders of each month in 2019
insert into table
itcast_ads
.ads_order_date
partition(dt='20190908')
select
'2' as date_type, --2 Represents month type
t1.year_month_number as date_val,
count(distinct t2.orderid), – Total orders
sum(t2.goodsprice*t2.goodsnum) – Order total
fromitcast_dw
.dim_date
t1
left join
(select * fromitcast_dw
.fact_order_goods
where dt <='20190908' and dt >= '20190101') t2
on t1.date_value = substr(createtime, 1, 10)
group by
t1.year_month_number
order by t1.year_month_number;
– test
select * fromitcast_ads
.ads_order_date
where dt='20190908' and date_type = '2';
4. Count the number of sales orders and total orders from Monday to Sunday in 2019
insert into table
itcast_ads
.ads_order_date
partition(dt='20190908')
select
'3' as date_type,
t1.weekday as date_val,
count(distinct t2.orderid), – Total orders
sum(t2.goodsprice*t2.goodsnum) – Order total
fromitcast_dw
.dim_date
t1
left join
(select * fromitcast_dw
.fact_order_goods
where dt ='20190908') t2
on t1.date_value = substr(createtime, 1, 10)
group by
t1.weekday
order by t1.weekday;
– test
select * fromitcast_ads
.ads_order_date
where dt='20190908' and date_type = '3';
5. Count the number of orders and total orders of national legal holidays, rest days and working days in 2019
insert into table
itcast_ads
.ads_order_date
partition(dt='20190908')
select
'4' as date_type,
t1.date_type as date_val,
count(distinct t2.orderid) as order_count, – Total orders
sum(t2.goodsprice*t2.goodsnum) as order_money – Order total
fromitcast_dw
.dim_date
t1
left join
(select * fromitcast_dw
.fact_order_goods
where dt <='20190908' and dt >= '20190101') t2
on t1.date_value = substr(createtime, 1, 10)
group by
t1.date_type
order by t1.date_type;
– test
select * fromitcast_ads
.ads_order_date
where dt='20190908' and date_type = '4';
Implementation flow chart: