【问题标题】:Parse field of json data in Snowflake table insert multiple rows into new Snowflake table雪花表中json数据的解析字段将多行插入到新的雪花表中
【发布时间】:2021-08-17 15:57:47
【问题描述】:

我在雪花表“tbl”中有一个字段/列“反应”。该表包含几列数据和许多记录。 'reactions' 字段是一个 json 数组。 json 数据可能有多个反应对象(由“名称”表示)并列出了具有反应的“用户”(参见下面的示例数组)。我想将此数据字段解析到一个新表中,以便反应的每个用户 ID 都是它自己的记录,并包含反应“名称”以及源表“tbl”中的一些其他数据列。我试图展平数据并使用 json 解析,但我未能成功达到所需的输出。非常感谢您提供的任何支持或指导!

包含 json 数组的“反应”字段值示例:

[{
    "name": "wave",
    "users": ["UM6G1DGBG", "UUW2KMQF8", "UG47U9K9N", "U01KCT0KJHL", "U7GRE4E9M", "UU1V5U3B4", "U01S7RRG21Y", "U013J1D5NFQ", "U020AS49MFA", "U010F5EDDQV", "U01ETTJHY4F"],
    "count": 11
}, {
    "name": "wave::skin-tone-3",
    "users": ["UUBFUS220"],
    "count": 1
}, {
    "name": "fire",
    "users": ["U01ETTJHY4F"],
    "count": 1
}]

所需的输出将如下所示:

OtherData1 UserID ReactionName
Attribute1 UM6G1DGBG wave
Attribute1 UUW2KMQF8 wave
Attribute1 UG47U9K9N wave
... ... ...
Attribute1 U01ETTJHY4F fire

【问题讨论】:

    标签: json parsing snowflake-cloud-data-platform


    【解决方案1】:

    您可以使用以下方法创建您想要的结果

    with tbl as (select 'foo' other_data, parse_json('[{
        "name": "wave",
        "users": ["UM6G1DGBG", "UUW2KMQF8", "UG47U9K9N", "U01KCT0KJHL", "U7GRE4E9M", "UU1V5U3B4", "U01S7RRG21Y", "U013J1D5NFQ", "U020AS49MFA", "U010F5EDDQV", "U01ETTJHY4F"],
        "count": 11
    }, {
        "name": "wave::skin-tone-3",
        "users": ["UUBFUS220"],
        "count": 1
    }, {
        "name": "fire",
        "users": ["U01ETTJHY4F"],
        "count": 1
    }]') users)
    
    select 
        other_data,  
        user_ids.value,  
        u.value:name,  
        u.value  
    from tbl,  
    lateral flatten(users) u,  
    lateral flatten(u.value:users) user_ids
    ;
    
    
    
    OTHER_DATA  VALUE   U.VALUE:NAME    VALUE
    foo "UM6G1DGBG" "wave"  {    "count": 11,    "name": "wave",    "users": [      "UM6G1DGBG",      "UUW2KMQF8",      "UG47U9K9N",      "U01KCT0KJHL",      "U7GRE4E9M",      "UU1V5U3B4",      "U01S7RRG21Y",      "U013J1D5NFQ",      "U020AS49MFA",      "U010F5EDDQV",      "U01ETTJHY4F"    ]  }
    foo "UUW2KMQF8" "wave"  {    "count": 11,    "name": "wave",    "users": [      "UM6G1DGBG",      "UUW2KMQF8",      "UG47U9K9N",      "U01KCT0KJHL",      "U7GRE4E9M",      "UU1V5U3B4",      "U01S7RRG21Y",      "U013J1D5NFQ",      "U020AS49MFA",      "U010F5EDDQV",      "U01ETTJHY4F"    ]  }
    foo "UG47U9K9N" "wave"  {    "count": 11,    "name": "wave",    "users": [      "UM6G1DGBG",      "UUW2KMQF8",      "UG47U9K9N",      "U01KCT0KJHL",      "U7GRE4E9M",      "UU1V5U3B4",      "U01S7RRG21Y",      "U013J1D5NFQ",      "U020AS49MFA",      "U010F5EDDQV",      "U01ETTJHY4F"    ]  }
    foo "U01KCT0KJHL"   "wave"  {    "count": 11,    "name": "wave",    "users": [      "UM6G1DGBG",      "UUW2KMQF8",      "UG47U9K9N",      "U01KCT0KJHL",      "U7GRE4E9M",      "UU1V5U3B4",      "U01S7RRG21Y",      "U013J1D5NFQ",      "U020AS49MFA",      "U010F5EDDQV",      "U01ETTJHY4F"    ]  }
    foo "U7GRE4E9M" "wave"  {    "count": 11,    "name": "wave",    "users": [      "UM6G1DGBG",      "UUW2KMQF8",      "UG47U9K9N",      "U01KCT0KJHL",      "U7GRE4E9M",      "UU1V5U3B4",      "U01S7RRG21Y",      "U013J1D5NFQ",      "U020AS49MFA",      "U010F5EDDQV",      "U01ETTJHY4F"    ]  }
    foo "UU1V5U3B4" "wave"  {    "count": 11,    "name": "wave",    "users": [      "UM6G1DGBG",      "UUW2KMQF8",      "UG47U9K9N",      "U01KCT0KJHL",      "U7GRE4E9M",      "UU1V5U3B4",      "U01S7RRG21Y",      "U013J1D5NFQ",      "U020AS49MFA",      "U010F5EDDQV",      "U01ETTJHY4F"    ]  }
    foo "U01S7RRG21Y"   "wave"  {    "count": 11,    "name": "wave",    "users": [      "UM6G1DGBG",      "UUW2KMQF8",      "UG47U9K9N",      "U01KCT0KJHL",      "U7GRE4E9M",      "UU1V5U3B4",      "U01S7RRG21Y",      "U013J1D5NFQ",      "U020AS49MFA",      "U010F5EDDQV",      "U01ETTJHY4F"    ]  }
    foo "U013J1D5NFQ"   "wave"  {    "count": 11,    "name": "wave",    "users": [      "UM6G1DGBG",      "UUW2KMQF8",      "UG47U9K9N",      "U01KCT0KJHL",      "U7GRE4E9M",      "UU1V5U3B4",      "U01S7RRG21Y",      "U013J1D5NFQ",      "U020AS49MFA",      "U010F5EDDQV",      "U01ETTJHY4F"    ]  }
    foo "U020AS49MFA"   "wave"  {    "count": 11,    "name": "wave",    "users": [      "UM6G1DGBG",      "UUW2KMQF8",      "UG47U9K9N",      "U01KCT0KJHL",      "U7GRE4E9M",      "UU1V5U3B4",      "U01S7RRG21Y",      "U013J1D5NFQ",      "U020AS49MFA",      "U010F5EDDQV",      "U01ETTJHY4F"    ]  }
    foo "U010F5EDDQV"   "wave"  {    "count": 11,    "name": "wave",    "users": [      "UM6G1DGBG",      "UUW2KMQF8",      "UG47U9K9N",      "U01KCT0KJHL",      "U7GRE4E9M",      "UU1V5U3B4",      "U01S7RRG21Y",      "U013J1D5NFQ",      "U020AS49MFA",      "U010F5EDDQV",      "U01ETTJHY4F"    ]  }
    foo "U01ETTJHY4F"   "wave"  {    "count": 11,    "name": "wave",    "users": [      "UM6G1DGBG",      "UUW2KMQF8",      "UG47U9K9N",      "U01KCT0KJHL",      "U7GRE4E9M",      "UU1V5U3B4",      "U01S7RRG21Y",      "U013J1D5NFQ",      "U020AS49MFA",      "U010F5EDDQV",      "U01ETTJHY4F"    ]  }
    foo "UUBFUS220" "wave::skin-tone-3" {    "count": 1,    "name": "wave::skin-tone-3",    "users": [      "UUBFUS220"    ]  }
    foo "U01ETTJHY4F"   "fire"  {    "count": 1,    "name": "fire",    "users": [      "U01ETTJHY4F"    ]  }```
    
    So to create the table do something like `create table flattened_tbl(other_data, user_name, user_data) as select other_data, u.value:name, u.value from tbl, lateral flatten(users) u;`
    

    【讨论】:

    • 嗨@Nat Taylor 感谢您的回复。我相信这会让我得到每个名称的记录,而不是每个用户 ID 的记录(例如用户 ID 值“U01ETTJHY4F”)。我可能最初沟通得不够好,所以我用所需的输出表修改了帖子描述,以更好地传达我的目标。
    • 我认为我编辑了答案以反映您的用例。
    猜你喜欢
    • 2021-04-11
    • 2021-08-01
    • 2020-07-29
    • 2019-03-27
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多