从一方面来说,我建议您检查输出数据帧的格式。
它使处理变得混乱和缓慢。
根据 pandas 的经验,我总是使用平面数据集 - 1D 或 2D。
无论如何,这里有一个带有处理数据的最小示例的代码:
import pandas as pd
from tabulate import tabulate
def replicate_nested_df(df, a, b, columns):
# add nested index
df[''] = a
df = df.set_index([''] + [columns])
# add numeric named column
df = df.rename(columns={0: b})
return df
def flatten_nested_df(df):
# flatten and save simulation parameter a and b from nested structure
b = df.columns.values.tolist()[0]
df = df.reset_index()
a = df.iloc[0, :]['']
# rename and drop columns
df = df.rename(columns={"level_1": "feature"})
df = df.rename(columns={b: "values"})
df = df[["feature", "values"]]
# transpose data
df = df.set_index(["feature"])
df = df.transpose().reset_index(drop=True)
df.rename_axis('', axis=1)
# add simulation parameters
df["a"] = a
df["b"] = b
return df
# create mockup dataframes
columns = ["Collisions", "Average distance", "Minimum distance"]
df1 = pd.DataFrame([[0.0073125, 3.05586, 0.86763]], columns=columns).transpose()
df1 = replicate_nested_df(df1, a=0.01, b=0.01, columns=columns)
df2 = pd.DataFrame([[0.003, 3.2, 0.8]], columns=columns).transpose()
df2 = replicate_nested_df(df2, a=0.01, b=10, columns=columns)
# process each dataframe
df_processed = []
for df_i in [df1, df2]:
df_processed.append(flatten_nested_df(df_i))
# create unique frame
df_concat = pd.concat(df_processed).reset_index(drop=True)
print("Mockup Input:")
print("df1:\n", df1)
print("df2:\n", df2)
print("Processed and merged dataset:")
print(tabulate(df_concat, headers=df_concat.columns, tablefmt='psql'))
输入:
输出:
+----+--------------+--------------------+--------------------+------+-------+
| | Collisions | Average distance | Minimum distance | a | b |
|----+--------------+--------------------+--------------------+------+-------|
| 0 | 0.0073125 | 3.05586 | 0.86763 | 0.01 | 0.01 |
| 1 | 0.003 | 3.2 | 0.8 | 0.01 | 10 |
+----+--------------+--------------------+--------------------+------+-------+