【发布时间】:2021-02-02 17:58:21
【问题描述】:
所以当我将 CSV 文件添加到我的 HQL 代码并在 HDFS 上运行它时,我遇到了这个问题。 我发现在插入数据时,它在分区部分中得到空值,并且某些列被删除我尝试了许多不同的方法来插入数据,但我仍然得到这个奇怪的符号和丢失的列,就像它无法读取 CSV 文件一样, 这是一张图片 enter image description here 这是代码`
CREATE database covid_db;
use covid_db;
SET hive.exec.dynamic.partition = true;
SET hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions=500;
set hive.exec.max.dynamic.partitions.pernode=500;
CREATE TABLE IF NOT EXISTS covid_db.covid_staging
(
Country STRING,
Total_Cases DOUBLE,
New_Cases DOUBLE,
Total_Deaths DOUBLE,
New_Deaths DOUBLE,
Total_Recovered DOUBLE,
Active_Cases DOUBLE,
Serious DOUBLE,
Tot_Cases DOUBLE,
Deaths DOUBLE,
Total_Tests DOUBLE,
Tests DOUBLE,
CASES_per_Test DOUBLE,
Death_in_Closed_Cases STRING,
Rank_by_Testing_rate DOUBLE,
Rank_by_Death_rate DOUBLE,
Rank_by_Cases_rate DOUBLE,
Rank_by_Death_of_Closed_Cases DOUBLE
)
ROW FORMAT DELIMITED FIELDS TERMINATED by ','
STORED as TEXTFILE
LOCATION '/user/cloudera/ds/COVID_HDFS_LZ'
tblproperties ("skip.header.line.count"="1", "serialization.null.format" = "''");
CREATE EXTERNAL TABLE IF NOT EXISTS covid_db.covid_ds_partitioned
(
Country STRING,
Total_Cases DOUBLE,
New_Cases DOUBLE,
Total_Deaths DOUBLE,
New_Deaths DOUBLE,
Total_Recovered DOUBLE,
Active_Cases DOUBLE,
Serious DOUBLE,
Tot_Cases DOUBLE,
Deaths DOUBLE,
Total_Tests DOUBLE,
Tests DOUBLE,
CASES_per_Test DOUBLE,
Death_in_Closed_Cases STRING,
Rank_by_Testing_rate DOUBLE,
Rank_by_Death_rate DOUBLE,
Rank_by_Cases_rate DOUBLE,
Rank_by_Death_of_Closed_Cases DOUBLE
)
PARTITIONED BY (COUNTRY_NAME STRING)
STORED as TEXTFILE
LOCATION '/user/cloudera/ds/COVID_HDFS_PARTITIONED';
FROM
covid_db.covid_staging
INSERT INTO TABLE covid_db.covid_ds_partitioned PARTITION(COUNTRY_NAME)
SELECT *,Country WHERE Country is not null;
CREATE EXTERNAL TABLE covid_db.covid_final_output
(
TOP_DEATH STRING,
TOP_TEST STRING
)
PARTITIONED BY (COUNTRY_NAME STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED by ','
STORED as TEXTFILE
LOCATION '/user/cloudera/ds/COVID_FINAL_OUTPUT';
`
【问题讨论】: