import pandas as pd
df = pd.DataFrame([[1,0,'x','y','z',8,9]], columns=list('ABCCCDD'))
result = pd.MultiIndex.from_product(
[grp for key, grp in df.T.groupby(level=0)[0]]).to_frame(index=False)
print(result)
产量
0 1 2 3
0 1 0 x 8
1 1 0 x 9
2 1 0 y 8
3 1 0 y 9
4 1 0 z 8
5 1 0 z 9
如果您的 DataFrame 有多于一行:
import numpy as np
import pandas as pd
def row_to_arrays(row, idx):
"""
Split a row into a list of component arrays.
idx specifies the indices at which we want to split the row
"""
# Use row[1:] because the first item in each row is the index
# (which we want to ignore)
result = np.split(row[1:], idx)
# Filter out empty strings
result = [arr[arr != ''] for arr in result]
# Filter out empty arrays
result = [arr for arr in result if len(arr)]
return result
def arrays_to_dataframe(arrays):
"""
Convert list of arrays to product DataFrame
"""
return pd.MultiIndex.from_product(arrays).to_frame(index=False)
def df_to_row_product(df):
# find the indices at which to cut each row
idx = pd.DataFrame(df.columns).groupby(0)[0].agg(lambda x: x.index[0])[1:]
data = [arrays_to_dataframe(row_to_arrays(row, idx))
for row in df.itertuples()]
result = pd.concat(data, ignore_index=True).fillna('')
return result
df = pd.DataFrame([[1,0,'x','y','z',8,9],
[2,4,'x','b','','','']], columns=list('ABCCCDD'))
print(df_to_row_product(df))
产量
0 1 2 3
0 1 0 x 8
1 1 0 x 9
2 1 0 y 8
3 1 0 y 9
4 1 0 z 8
5 1 0 z 9
6 2 4 x
7 2 4 b