【问题标题】:Trasform Json into Pandas Dataframe将 Json 转换为 Pandas 数据框
【发布时间】:2021-12-13 16:18:37
【问题描述】:

我有这种 json,我会将其转换为带有特定列名的 pandas 数据框。

{
    "data": [
        {
            "id": 1,
            "name": "3Way Result",
            "suspended": false,
            "bookmaker": {
                "data": [
                    {
                        "id": 27802,
                        "name": "Ladbrokes",
                        "odds": {
                            "data": [
                                {
                                    "label": "1",
                                    "value": "1.61",
                                    "probability": "62.11%",
                                    "dp3": "1.610",
                                    "american": -164,
                                    "factional": null,
                                    "winning": null,
                                    "handicap": null,
                                    "total": null,
                                    "bookmaker_event_id": null,
                                    "last_update": {
                                        "date": "2021-10-01 16:41:27.000000",
                                        "timezone_type": 3,
                                        "timezone": "UTC"
                                    }
                                },
                                {
                                    "label": "X",
                                    "value": "3.90",
                                    "probability": "25.64%",
                                    "dp3": "3.900",
                                    "american": 290,
                                    "factional": null,
                                    "winning": null,
                                    "handicap": null,
                                    "total": null,
                                    "bookmaker_event_id": null,
                                    "last_update": {
                                        "date": "2021-10-01 16:41:27.000000",
                                        "timezone_type": 3,
                                        "timezone": "UTC"
                                    }
                                },
                                {
                                    "label": "2",
                                    "value": "5.20",
                                    "probability": "19.23%",
                                    "dp3": "5.200",
                                    "american": 420,
                                    "factional": null,
                                    "winning": null,
                                    "handicap": null,
                                    "total": null,
                                    "bookmaker_event_id": null,
                                    "last_update": {
                                        "date": "2021-10-01 16:41:27.000000",
                                        "timezone_type": 3,
                                        "timezone": "UTC"
                                    }
                                }
                            ]
                        }
                    },
                    {
                        "id": 70,
                        "name": "Pncl",
                        "odds": {
                            "data": [
                                {
                                    "label": "1",
                                    "value": "1.65",
                                    "probability": "60.61%",
                                    "dp3": "1.645",
                                    "american": -154,
                                    "factional": null,
                                    "winning": null,
                                    "handicap": null,
                                    "total": null,
                                    "bookmaker_event_id": null,
                                    "last_update": {
                                        "date": "2021-10-01 16:59:18.000000",
                                        "timezone_type": 3,
                                        "timezone": "UTC"
                                    }
                                },
                                {
                                    "label": "X",
                                    "value": "4.20",
                                    "probability": "23.81%",
                                    "dp3": "4.200",
                                    "american": 320,
                                    "factional": null,
                                    "winning": null,
                                    "handicap": null,
                                    "total": null,
                                    "bookmaker_event_id": null,
                                    "last_update": {
                                        "date": "2021-10-01 16:59:18.000000",
                                        "timezone_type": 3,
                                        "timezone": "UTC"
                                    }
                                },
                                {
                                    "label": "2",
                                    "value": "5.43",
                                    "probability": "18.42%",
                                    "dp3": "5.430",
                                    "american": 443,
                                    "factional": null,
                                    "winning": null,
                                    "handicap": null,
                                    "total": null,
                                    "bookmaker_event_id": null,
                                    "last_update": {
                                        "date": "2021-10-01 16:59:18.000000",
                                        "timezone_type": 3,
                                        "timezone": "UTC"
                                    }
                                }
                            ]
                        }
                    }
                ]
            }
        }
    ],
    "meta": {
        "plans": [
            {
                "name": "Football Free Plan",
                "features": "Standard",
                "request_limit": "180,60",
                "sport": "Soccer"
            }
        ],
        "sports": [
            {
                "id": 1,
                "name": "Soccer",
                "current": true
            }
        ]
    }
}

所有列名称包含博彩公司的名称加上标签值。 我会将标签中的值用作列名,并在name 中使用博彩公司的名称。然后value 中的float 将其用作数据框的行

这里是预期的输出

   1_LadBrokes  X_LadBrokes  2_LadBrokes       last_update_LadBrokes  1_Pncl  X_Pncl  2_Pncl            last_update_Pncl
0         1.61          3.9          5.2  2021-10-01 16:41:27.000000    1.65     4.2    5.43  2021-10-01 16:59:18.000000

【问题讨论】:

标签: python json pandas dataframe


【解决方案1】:

使用pd.json_normalize 并为valuelast_update 创建两个子数据框,将它们加入。

out = pd.json_normalize(
          data=data['data'],
          record_path=['bookmaker', 'data', 'odds', 'data'], 
          meta=[['bookmaker', 'data', 'name']]
      )[['label', 'value', 'last_update.date', 'bookmaker.data.name']]

df1 = out.set_index(out['label'] + '_' + out['bookmaker.data.name'])['value']
df2 = out.set_index('bookmaker.data.name')['last_update.date'] \
         .add_prefix('last_update_').drop_duplicates()

df = pd.concat([df1, df2]).to_frame().T

输出:

>>> df
  1_Ladbrokes_Ladbrokes X_Ladbrokes_Ladbrokes 2_Ladbrokes_Ladbrokes 1_Pncl_Pncl X_Pncl_Pncl 2_Pncl_Pncl       last_update_Ladbrokes            last_update_Pncl
0                  1.61                  3.90                  5.20        1.65        4.20        5.43  2021-10-01 16:41:27.000000  2021-10-01 16:59:18.000000

【讨论】:

    【解决方案2】:

    您可以使用json_normalize + apply 来实现它。

    def set_values(x):
        data = x["odds.data"]
        label = data.get("label")
        value = data.get("value")
        last_update_date = data.get("last_update").get("date")
        name = x["name"]
        x[f"{label}_{name}"] = value
        x[f"last_update_{name}"] = last_update_date
        return x
    
    
    df = (
        pd.json_normalize(data["data"], record_path=["bookmaker", "data"])
        .explode("odds.data")
        .apply(lambda x: set_values(x), axis=1)
        .drop(["odds.data", "id", "name"], axis=1)
        .ffill()
        .bfill()
        .head(1)
    )
    
    In [39]: df
    Out[39]: 
      1_Ladbrokes 1_Pncl 2_Ladbrokes 2_Pncl X_Ladbrokes X_Pncl       last_update_Ladbrokes            last_update_Pncl
    0        1.61   1.65        5.20   5.43        3.90   4.20  2021-10-01 16:41:27.000000  2021-10-01 16:59:18.000000
    

    【讨论】:

    • 谢谢,它有效!
    猜你喜欢
    • 2019-05-14
    • 2019-07-01
    • 2020-10-31
    • 2020-09-02
    • 2020-02-29
    • 2017-03-21
    • 2021-03-01
    • 2020-12-16
    • 2021-02-09
    相关资源
    最近更新 更多