这是基于您的代码的答案!
import pandas as pd
import numpy as np
#Slightly modified your function
def tag_stats(df, tag_list):
df = df.set_index('Batch_Name')
data = {
'tag':[],
'min':[],
'max':[],
'min_batch':[],
'max_batch':[],
'std':[],
'mean':[],
}
for tag in tag_list:
values = df[tag]
data['tag'].append(tag)
data['min'].append(values.min())
data['max'].append(values.max())
data['min_batch'].append(values.idxmin())
data['max_batch'].append(values.idxmax())
data['std'].append(values.std())
data['mean'].append(values.mean())
result = pd.DataFrame(data)
return result
#Create a df using some random data
np.random.seed(1)
num_batches = 10
df = pd.DataFrame({
'Batch_Name':['batch_{}'.format(i) for i in range(num_batches)],
'Tag 1':np.random.randint(1,100,num_batches),
'Tag 2':np.random.randint(1,100,num_batches),
'Tag 3':np.random.randint(1,100,num_batches),
'Tag 4':np.random.randint(1,100,num_batches),
'Tag 5':np.random.randint(1,100,num_batches),
})
#Apply your function
cols = ['Tag 1','Tag 2','Tag 3','Tag 4','Tag 5']
summary_df = tag_stats(df, cols)
print(summary_df)
输出
tag min max min_batch max_batch std mean
0 Tag 1 2 80 batch_9 batch_6 32.200759 38.0
1 Tag 2 7 85 batch_2 batch_7 28.926919 39.9
2 Tag 3 14 97 batch_9 batch_7 33.297314 63.4
3 Tag 4 1 82 batch_7 batch_9 31.060693 37.1
4 Tag 5 4 89 batch_7 batch_1 31.212711 43.3
@It_is_Chris 的评论也很棒,这是基于它的答案
import pandas as pd
import numpy as np
#Create a df using some random data
np.random.seed(1)
num_batches = 10
df = pd.DataFrame({
'Batch_Name':['batch_{}'.format(i) for i in range(num_batches)],
'Tag 1':np.random.randint(1,100,num_batches),
'Tag 2':np.random.randint(1,100,num_batches),
'Tag 3':np.random.randint(1,100,num_batches),
'Tag 4':np.random.randint(1,100,num_batches),
'Tag 5':np.random.randint(1,100,num_batches),
})
#Convert to a long df and index by Batch_Name:
# index | tag | tag_value
# ------------------------------------
# batch_0 | Tag 1 38 | 38
# batch_1 | Tag 1 13 | 13
# batch_2 | Tag 1 73 | 73
long_df = df.melt(
id_vars = 'Batch_Name',
var_name = 'tag',
value_name = 'tag_value',
).set_index('Batch_Name')
#Groupby tag and aggregate to get columns of interest
summary_df = long_df.groupby('tag').agg(
max_value = ('tag_value','max'),
max_batch = ('tag_value','idxmax'),
min_value = ('tag_value','min'),
min_batch = ('tag_value','idxmin'),
mean_value = ('tag_value','mean'),
std_value = ('tag_value','std'),
).reset_index()
summary_df
输出:
tag max_value max_batch min_value min_batch mean_value std_value
0 Tag 1 80 batch_6 2 batch_9 38.0 32.200759
1 Tag 2 85 batch_7 7 batch_2 39.9 28.926919
2 Tag 3 97 batch_7 14 batch_9 63.4 33.297314
3 Tag 4 82 batch_9 1 batch_7 37.1 31.060693
4 Tag 5 89 batch_1 4 batch_7 43.3 31.212711