Chapter VII_ Partition table [single partition, multi partition, dynamic partition, modified partition]

1. What is zoning
1. Partitions in hive are subdirectories (for data files) (table = directory, partition = directory)

2. Why create partitions (benefits of partitions)
1. Data isolation & Query Optimization

3. Single partition
-- Single partition
-- Create partition table(Single partition)
create table home.ods_front_log_dd (
log_id string comment 'journal id',
log_type string comment 'Log type',
event_key string comment 'User behavior representation')
comment 'Front end log table(Sky level)'
PARTITIONED BY (ds string)
row format delimited fields terminated by '\t'
lines terminated by '\n'
stored as orc;

-- Write data to partition table(Static partition)
insert overwrite table home.ods_front_log_dd partition(ds = '2022-01-01')

select 1 as log_id,'Exhibition' as log_type,'Home page display' as event_key union all
select 2 as log_id,'click' as log_type,'Home page click' as event_key union all
select 3 as log_id,'Exhibition' as log_type,'Landing page display' as event_key union all
select 4 as log_id,'click' as log_type,'Home page result page' as event_key union all
select 5 as log_id,'click' as log_type,'Click 1 on the home page' as event_key union all
select 6 as log_id,'click' as log_type,'Click 2 on the home page' as event_key;

-- View data directory
/user/hive/warehouse/home.db/ods_front_log_dd/ds=2022-01-01/000000_0
4. Multi partition
-- Multi partition
-- Create partition table(Multiple partitions)
create table home.ods_front_log_hour (
log_id string comment 'journal id',
log_type string comment 'Log type',
event_key string comment 'User behavior representation')
comment 'Front end log table(hour)'
PARTITIONED BY (ds string,hour string)
row format delimited fields terminated by '\t'
lines terminated by '\n'
stored as orc;

-- Write data to partition table(Multiple partitions)
insert overwrite table home.ods_front_log_hour partition(ds = '2022-01-01' ,hour = '00')

select 1 as log_id,'Exhibition' as log_type,'Home page display' as event_key union all
select 2 as log_id,'click' as log_type,'Home page click' as event_key union all
select 3 as log_id,'Exhibition' as log_type,'Landing page display' as event_key union all
select 4 as log_id,'click' as log_type,'Home page result page' as event_key union all
select 5 as log_id,'click' as log_type,'Click 1 on the home page' as event_key union all
select 6 as log_id,'click' as log_type,'Click 2 on the home page' as event_key;

-- Data directory
/user/hive/warehouse/home.db/ods_front_log_hour/ds=2022-01-01/hour=00/000000_0
5. Dynamic zoning
-- 1.  What is dynamic partition & static partition
1. Static partition: when writing data to the partition table, the value of the partition field must be written dead (a fixed value)
2. Dynamic partition: when writing data to the partition table, the value of the partition field is automatically determined according to the value of the partition field in the data

-- 2.  Parameter setting
-- 1. Enable the dynamic partition function (true by default, enabled)
set hive.exec.dynamic.partition=true;

-- 2. Set non strict mode (strict mode by default)
--Strict: strict mode (at least one partition field must be specified as a static partition)
--nonstrict: non strict mode (all partition fields can use dynamic partition)
set hive.exec.dynamic.partition.mode=nonstrict;

-- 3. Set the maximum number of partitions (the default is 1000)
--Indicates the maximum number of dynamic partitions that can be created by each maper or reducer. The default is 100. If it is exceeded, an error will be reported
set hive.exec.max.dynamic.partitions=2000;

-- 4. Set the maximum number of partitions allowed to be created by dynamic partition statements (the default is 100)
--Indicates the maximum number of dynamic partitions that can be created by a dynamic partition statement, exceeding which an error is reported
--Determined by the data to be inserted into the table
set hive.exec.max.dynamic.partitions.pernode=400;

-- 5. How many HDFS files can be created in the whole MR Job (100000 by default)
set hive.exec.max.created.files=100000;

-- 6. Whether to throw an exception when an empty partition is generated (false by default)
--Generally, no setting is required
set hive.error.on.empty.partition=false;
-- Write data to partition table(Dynamic partition)
-- The last field of the query is the partition field

set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions=2000;
set hive.exec.max.dynamic.partitions.pernode=400;
set hive.exec.max.created.files=100000;

insert overwrite table home.ods_front_log_dd partition(ds)

select 1 as log_id,'Exhibition' as log_type,'Home page display' as event_key,'2022-01-02' as ds union all
select 2 as log_id,'click' as log_type,'Home page click' as event_key,'2022-01-02' as ds union all
select 3 as log_id,'Exhibition' as log_type,'Landing page display' as event_key,'2022-01-02' as ds union all
select 4 as log_id,'click' as log_type,'Home page result page' as event_key,'2022-01-03' as ds union all
select 5 as log_id,'click' as log_type,'Click 1 on the home page' as event_key,'2022-01-03' as ds union all
select 6 as log_id,'click' as log_type,'Click 2 on the home page' as event_key,'2022-01-03' as ds;

-- Data directory
drwxr-xr-x    root    supergroup    0 B    Jan 29 15:03    0    0 B    ds=2022-01-01
drwxr-xr-x    root    supergroup    0 B    Jan 29 16:02    0    0 B    ds=2022-01-02
drwxr-xr-x    root    supergroup    0 B    Jan 29 16:02    0    0 B    ds=2022-01-03

6. View, add and delete partitions
--1.  See how many partitions the partition table has
show partitions home.ods_front_log_dd;
partition
ds=2022-01-01
ds=2022-01-02
ds=2022-01-03

--2. Add partition (create empty directory)
alter table home.ods_front_log_dd add partition(ds='001'); -- Create a single partition
alter table home.ods_front_log_dd add partition(ds='002') partition(ds='003'); -- Create multiple partitions

--3. delete a partition
alter table home.ods_front_log_dd drop partition(ds='001'); -- Create a single partition
alter table home.ods_front_log_dd drop partition(ds='002') ,partition(ds='003'); -- Create multiple partitions








 

Keywords: hive

Added by Fife Club on Sun, 30 Jan 2022 07:59:34 +0200