【发布时间】:2019-12-24 23:28:56
【问题描述】:
我正在尝试为以下 JSON 创建 StructType Schema
{
"countries":{
"country":[
{
"area":9596960,
"cities":{
},
"name":"China",
"population":1210004992
},
{
"area":3287590,
"cities":{
},
"name":"India",
"population":952107712
},
{
"area":9372610,
"cities":{
"city":[
{
"name":"New York",
"population":7380906
},
{
"name":"Los Angeles",
"population":3553638
},
{
"name":"Chicago",
"population":2721547
},
{
"name":"Detroit",
"population":1000272
}
]
},
"name":"United States",
"population":266476272
},
{
"area":1919440,
"cities":{
"city":[
{
"name":"Jakarta",
"population":8259266
},
{
"name":"Surabaya",
"population":2483871
},
{
"name":"Bandung",
"population":2058649
},
{
"name":"Medan",
"population":1730752
},
{
"name":"Semarang",
"population":1250971
},
{
"name":"Palembang",
"population":1144279
}
]
},
"name":"Indonesia",
"population":206611600
}
]
}
}
我正在执行以下代码来获取所有国家/地区的名称
DataTypes.createStructField("countries", (new StructType()).add(DataTypes.createStructField("country",
(new StructType()).add(DataTypes.createStructField("name", DataTypes.StringType, true)), true)), true)
但是当我在下面运行以获取所有国家/地区名称时
Dataset<Row> namesDF = spark.sql("SELECT countries FROM country");
namesDF.show();
我收到nulls,请问如何使用 StructType 解析 Json 字段以获取值..?
正在做的事情,这是正确的做法吗..?我正在尝试从 JSON 上方获取国家/地区名称
更新:
代码:
static final StructType SCHEMA = new StructType(new StructField[] {
DataTypes.createStructField("countries",
new StructType().add(DataTypes.createStructField("country",
new ArrayType(new StructType()
.add(DataTypes.createStructField("name", DataTypes.StringType, true)), true),
true)),
true) }
);
}
入口点
Dataset<Row> ds = spark.read().schema(Jsonreadystructure.SCHEMA)
.json(context.getProperty(GlobalConstants.ReadyJsonFile));
ds.printSchema();
ds.createOrReplaceTempView("country_data");
ds.sqlContext().sql("SELECT country.name FROM country_data lateral view explode(countries.country) t as country").show(false);
输出
root
|-- countries: struct (nullable = true)
| |-- country: array (nullable = true)
| | |-- element: struct (containsNull = true)
| | | |-- name: string (nullable = true)
+----+
|name|
+----+
+----+
为什么它显示空名称..?我正在使用火花 2.4.4
架构发现
root
|-- countries: struct (nullable = true)
| |-- country: array (nullable = true)
| | |-- element: struct (containsNull = true)
| | | |-- area: double (nullable = true)
| | | |-- cities: struct (nullable = true)
| | | | |-- city: string (nullable = true)
| | | |-- name: string (nullable = true)
| | | |-- population: long (nullable = true)
【问题讨论】:
标签: java apache-spark