导入和数据帧
import pandas as pd
import matplotlib as mpl
from matplotlib.patches import Patch
import matplotlib.pyplot as plt
import numpy as np
data = {'Candidate': ['X', 'Y', 'Z', 'X', 'Y', 'Z', 'X', 'Y', 'Z'],
'Sample_Set': [1, 1, 1, 2, 2, 2, 3, 3, 3],
'Values': [20, 10, 10, 200, 101, 99, 1999, 998, 1003],
'Error': [5, 2, 3, 30, 30, 30, 10, 10, 10]}
colors = ['black', 'red', 'green', 'blue', 'cyan','brown','grey','goldenrod','lime','violet','indigo','coral','olive']
df = pd.DataFrame(data)
df = df.sort_values(['Candidate', 'Sample_Set'])
# display(df)
Candidate Sample_Set Values Error
0 X 1 20 5
3 X 2 200 30
6 X 3 1999 10
1 Y 1 10 2
4 Y 2 101 30
7 Y 3 998 10
2 Z 1 10 3
5 Z 2 99 30
8 Z 3 1003 10
# reshape the dataframe into a wide format for Values
vals = df.pivot(index='Candidate', columns='Sample_Set', values='Values')
# display(vals)
Sample_Set 1 2 3
Candidate
X 20 200 1999
Y 10 101 998
Z 10 99 1003
# reshape the dataframe into a wide format for Errors
yerr = df.pivot(index='Candidate', columns='Sample_Set', values='Error')
# display(yerr)
Sample_Set 1 2 3
Candidate
X 5 30 10
Y 2 30 10
Z 3 30 10
更新答案
- 根据 OP 的评论,目标似乎是替换默认颜色,但不同组的颜色应该相同。
- 使用新答案中的代码,这很容易通过将
colors 列表传递给color 参数来完成。
# plot vals with yerr
ax = vals.plot(kind='bar', yerr=yerr, logy=True, rot=0, figsize=(6, 5), ylabel='Value', title='Comparison', color=colors)
ax.legend(title='Sample Set', bbox_to_anchor=(1, 1.02), loc='upper left')
plt.show()
使用 OP 中引用的答案
-
for i, (gr, color) in enumerate(zip(subx, colors)): 允许将列表中的颜色分配给每个组。
def grouped_barplot(df, cat,subcat, val , err):
u = df[cat].unique()
x = np.arange(len(u))
subx = df[subcat].unique()
offsets = (np.arange(len(subx))-np.arange(len(subx)).mean())/(len(subx)+1.)
width= np.diff(offsets).mean()
colors = ['black', 'red', 'green', 'blue', 'cyan','brown','grey','goldenrod','lime','violet','indigo','coral','olive']
# add the colors to the loop
for i, (gr, color) in enumerate(zip(subx, colors)):
dfg = df[df[subcat].eq(gr)]
plt.bar(x+offsets[i], dfg[val], width=width, yerr=dfg[err], color=color, label=gr)
plt.legend(title='Sample_Set', bbox_to_anchor=(1, 1.02), loc='upper left')
plt.yscale('log')
plt.xlabel(cat)
plt.ylabel(val)
plt.xticks(x, u)
plt.show()
cat = "Candidate"
subcat = "Sample_Set"
val = "Values"
err = "Error"
grouped_barplot(df, cat, subcat, val, err )
原答案
-
请求可以实现,但不应:
- 这不是一个好的可视化实践。给定组中的条形应该都具有相同的颜色。可视化的重点是传达信息。所有这一切都会使情节令人费解,从而使目的落空。
- 因为条形图是按组绘制的:
'X'、'Y'、'Z' 的 'Sample_Set 1'、'X'、'Y'、'Z' 的 'Sample_Set 2' 和 @98765487@ 和 @98765487@ @、'Z' 或 'Sample_Set 3',图例中只会创建 3 个标签,这意味着需要创建具有适当句柄和标签的自定义补丁图例。
绘图和自定义
-
rects的顺序与df不一样,所以df的排序不同,以便zip正确的颜色到正确的rect
# add a colors column to the dataframe
df['color'] = colors[:len(df)]
# plot vals with yerr
ax = vals.plot(kind='bar', yerr=yerr, logy=True, rot=0, figsize=(6, 5), legend=False, ylabel='Value', title='Comparison')
# extract the Rectangle bar objects
rects = [c for c in ax.get_children() if isinstance(c, mpl.patches.Rectangle)]
# change the face color of the bar
for rect, color in zip(rects, df.sort_values(['Sample_Set', 'Candidate'])['color']):
rect.set_fc(color)
# create a custom handle for the legend
handles = list()
for idx, v in df.iterrows():
patch = Patch(color=v.color, label=f'{v.Candidate} {v.Sample_Set}')
handles.append(patch)
# add the legend
ax.legend(title='Candidate Sample_Set', handles=handles, bbox_to_anchor=(1, 1.02), loc='upper left')
plt.show()
def grouped_barplot(df, cat,subcat, val , err):
u = df[cat].unique()
x = np.arange(len(u))
subx = df[subcat].unique()
offsets = (np.arange(len(subx))-np.arange(len(subx)).mean())/(len(subx)+1.)
width= np.diff(offsets).mean()
# group the colors: range(3) because there are 3 groups
colors = [df.color[n::3] for n in range(3)]
for i, (gr, color) in enumerate(zip(subx, colors)):
dfg = df[df[subcat].eq(gr)]
plt.bar(x+offsets[i], dfg[val], width=width, yerr=dfg[err], color=color)
handles = list()
for idx, v in df.iterrows():
patch = Patch(color=v.color, label=f'{v.Candidate} {v.Sample_Set}')
handles.append(patch)
plt.legend(title='Candidate Sample_Set', handles=handles, bbox_to_anchor=(1, 1.02), loc='upper left')
plt.yscale('log')
plt.xlabel(cat)
plt.ylabel(val)
plt.xticks(x, u)
plt.show()
cat = "Candidate"
subcat = "Sample_Set"
val = "Values"
err = "Error"
# add a colors column to the dataframe
df['color'] = colors[:len(df)]
grouped_barplot(df, cat, subcat, val, err )