【问题标题】:insert json file data to sql table using sparksql使用 sparksql 将 json 文件数据插入 sql 表
【发布时间】:2020-03-20 10:03:50
【问题描述】:

我正在尝试使用 sparksql 将 json 文件数据插入到 sql 表中 我的示例 json 文件示例:

{
    "id": "value_string",
    "aggregate_id": "value_string",
    "type": "value_string",
    "timestamp": "value_string",
    "data": {
        "customer_id": "value_string",
        "name": "value_string"
    }
}

想使用 spark 插入 sql 表,尝试像这样创建但不能

     public class DataOfPerson
        {
            public string name { get; set; }
            public string birthdate { get; set; }
            public string customer_id { get; set; }

        }
        public class Person
        {
            public string id { get; set; }
            public string aggregate_id { get; set; }
            public string type { get; set; }
            public string timestamp { get; set; }  
            public List<DataOfPerson> dataOfPerson { get; set; }
        }
        public class RootObject
        {
            public Person person { get; set; }
        }
        var root = JsonConvert.DeserializeObject<RootObject> (sqlContext.read.json(s"abfss://abc@xyz/events.json")

【问题讨论】:

    标签: sql json dataframe apache-spark-sql insertion


    【解决方案1】:

    def flattenDataFrame(spark: SparkSession, nestedDf: DataFrame): DataFrame = {

    var flatCols = Array.empty[String]
    var nestedCols = Array.empty[String]
    var flatDF = spark.emptyDataFrame
    for (w <- nestedDf.dtypes) {
      if (w._2.contains("Struct")) {
        nestedCols = nestedCols.:+(w._1)
      } else {
        flatCols = flatCols.:+(w._1)
      }
    }
    
    var nestedCol = Array.empty[String]
    for (nc <- nestedCols) {
      for (c <- nestedDf.select(nc + ".*").columns) {
        nestedCol = nestedCol.:+(nc + "." + c)
      }
    }
    val allColumns = flatCols ++ nestedCol
    val colNames = allColumns.map(name => col(name))
    nestedDf.select(colNames: _*)
    

    }

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 2021-02-19
      • 1970-01-01
      • 2014-04-27
      • 1970-01-01
      • 2019-09-12
      • 1970-01-01
      • 1970-01-01
      • 2018-03-25
      相关资源
      最近更新 更多