【问题标题】:How can I highlight the largest value(s) in df.plot.barh?如何突出显示 df.plot.barh 中的最大值?
【发布时间】:2022-01-11 16:53:17
【问题描述】:

我有一个堆积条形图,它使用颜色显示 NBA 球队中年龄组的分布,代码如下所示:

import matplotlib.pyplot as mpl
import matplotlib.cm as mcm
import pandas as pd
import numpy as np
from typing import List, Tuple
def read_to_df(file_path: str) -> pd.DataFrame:
    return pd.read_excel(file_path, index_col = 0)
def color_to_hex(color: Tuple[float]) -> str:
    color = [i * 255 if i * 255 <= 255 else i * 255 - 1
             for i in color[:-1]]
    color = [int(round(i)) for i in color]
    return "#%02x%02x%02x" % tuple(color)
def cmap_to_colors(cmap: str,
                   amount: int) -> List[str]:
    cmap = mcm.get_cmap(cmap)
    colors = [color_to_hex(cmap(i))
              for i in np.linspace(0, 1, amount)]
    return colors
def main() -> None:
    df = read_to_df("age_dist_median_six.xlsx")
    df_age_only = df.drop(["median", "youngest", "oldest"], axis = 1)
    # transpose the dataframe
    df_age_only = df_age_only.iloc[::-1]
    # get a list of colors from cmap
    colors = cmap_to_colors("viridis", 6)
    barh = df_age_only.plot.barh(stacked = True, color = colors,
                                 width = 0.95, xticks = np.linspace(0, 100, 11),
                                 figsize = (10, 15))
    barh.legend(bbox_to_anchor=(1, 1))
    barh.margins(x = 0)
    mpl.savefig("stacked_six_viridis.png")
if __name__ == "__main__":
    main()

我通常对结果感到满意,但我也想突出显示最大的条:所以我希望用实际颜色显示最大的一个(或多个,以防多个条具有最大百分比)和所有其他条以灰色显示。我该怎么做?条形图现在是这样的: 用于图表的 .xlsx 文件可在此处查看:https://send-anywhere.com/web/downloads/RJN1IIPS

【问题讨论】:

    标签: python pandas matplotlib visualization


    【解决方案1】:

    一个想法是遍历生成的条形图,并更改它们的透明度。

    下面的示例代码使用plt 使代码更容易与教程和网络上的示例进行比较。另请注意,pandas 的绘图函数通常返回 ax

    import matplotlib.pyplot as plt
    from matplotlib.ticker import PercentFormatter
    import pandas as pd
    
    df = pd.read_excel("age_dist_median_six.xlsx", index_col=0)
    
    df_age_only = df.drop(["median", "youngest", "oldest"], axis=1)
    
    ax = df_age_only.plot.barh(cmap='viridis', stacked=True, edgecolor='black', width=1, clip_on=False, figsize=(12, 6))
    ax.legend(bbox_to_anchor=(1.01, 1), loc='upper left')
    ax.invert_yaxis()
    ax.margins(x=0, y=0)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.tick_params(axis='y', length=0)
    ax.xaxis.set_major_formatter(PercentFormatter(100))
    
    for i in range(len(df_age_only)):
        max_val = np.nanmax(df_age_only.iloc[i].values)
        for bar_group in ax.containers:
            bar = bar_group[i]
            if bar.get_width() < max_val - 1e-6:
                bar.set_alpha(0.4)
                # bar.set_facecolor('#DDDDDD') # light grey
    plt.tight_layout()
    plt.show()
    

    【讨论】:

    • 太棒了,非常感谢!
    【解决方案2】:

    这是一种直接使用 matplotlib 的 barh 函数来做你想做的事情的方法。这个想法是迭代地设置水平堆叠条并同时分配适当的颜色。以下是您提供的代码的改编版,用于执行我上面描述的操作:

    import matplotlib.pyplot as mpl
    import matplotlib.cm as mcm
    import pandas as pd
    import numpy as np
    from typing import List, Tuple
    
    def read_to_df(file_path: str) -> pd.DataFrame:
        return pd.read_excel(file_path, index_col = 0)
    def color_to_hex(color: Tuple[float]) -> str:
        color = [i * 255 if i * 255 <= 255 else i * 255 - 1
                 for i in color[:-1]]
        color = [int(round(i)) for i in color]
        return "#%02x%02x%02x" % tuple(color)
    def cmap_to_colors(cmap: str,
                       amount: int) -> List[str]:
        cmap = mcm.get_cmap(cmap)
        colors = [color_to_hex(cmap(i)) for i in np.linspace(0, 1, amount)]
        return colors
    
    def main() -> None:
        df = read_to_df("age_dist_median_six.xlsx")
        df_age_only = df.drop(["median", "youngest", "oldest"], axis = 1)
        # transpose the dataframe
        df_age_only = df_age_only.iloc[::-1]
        colors = cmap_to_colors("viridis", 6)
        
        fig=mpl.figure(figsize=(12,12))
        N_teams=len(df_age_only)
    
        for i in range(N_teams):
          x_pos=0
          for column,j in zip(df_age_only,range(len(colors))):
            col_max=df_age_only.idxmax(axis='columns')[i]
            if df_age_only[col_max][i]==df_age_only[column][i]:
              if j==0:
                mpl.barh(i,df_age_only[column][i],color=colors[j],align='center',edgecolor='k')
              elif j>0:
                mpl.barh(i,df_age_only[column][i],color=colors[j],left=x_pos,align='center',edgecolor='k')
               
            else:
              if j==0:
                mpl.barh(i,df_age_only[column][i],color='tab:grey',align='center',edgecolor='k')
              elif j>0:
                mpl.barh(i,df_age_only[column][i],color='tab:grey',left=x_pos,align='center',edgecolor='k')
            
            x_pos+=df_age_only[column][i]
    
        mpl.yticks(np.arange(N_teams),df_age_only.index,fontsize=9)
    
        #Setting up legend:
        for i in range(len(colors)):
          mpl.plot([],[],color=colors[i],lw=10,label=str(df_age_only.keys()[i]))
        mpl.legend()
        
        mpl.show()
        mpl.savefig("stacked_six_viridis.png")
    if __name__ == "__main__":
        main()
    

    输出给出:

    或者,如果您想保留原始颜色但突出显示具有最大值的条形,您可以通过改变 alpha 值来更改条形的透明度。见以下代码:

    import matplotlib.pyplot as mpl
    import matplotlib.cm as mcm
    import pandas as pd
    import numpy as np
    from typing import List, Tuple
    
    def read_to_df(file_path: str) -> pd.DataFrame:
        return pd.read_excel(file_path, index_col = 0)
    def color_to_hex(color: Tuple[float]) -> str:
        color = [i * 255 if i * 255 <= 255 else i * 255 - 1
                 for i in color[:-1]]
        color = [int(round(i)) for i in color]
        return "#%02x%02x%02x" % tuple(color)
    def cmap_to_colors(cmap: str,
                       amount: int) -> List[str]:
        cmap = mcm.get_cmap(cmap)
        colors = [color_to_hex(cmap(i)) for i in np.linspace(0, 1, amount)]
        return colors
    
    def main() -> None:
        df = read_to_df("age_dist_median_six.xlsx")
        df_age_only = df.drop(["median", "youngest", "oldest"], axis = 1)
        # transpose the dataframe
        df_age_only = df_age_only.iloc[::-1]
        colors = cmap_to_colors("viridis", 6)
        
    
        fig=mpl.figure(figsize=(12,12))
        N_teams=len(df_age_only)
    
        for i in range(N_teams):
          x_pos=0
          for column,j in zip(df_age_only,range(len(colors))):
            col_max=df_age_only.idxmax(axis='columns')[i]
            if df_age_only[col_max][i]==df_age_only[column][i]:
              if j==0:
                mpl.barh(i,df_age_only[column][i],color=colors[j],align='center',edgecolor='tab:grey',alpha=1)
              elif j>0:
                mpl.barh(i,df_age_only[column][i],color=colors[j],left=x_pos,align='center',edgecolor='tab:grey',alpha=1)
               
            else:
              if j==0:
                mpl.barh(i,df_age_only[column][i],color=colors[j],align='center',edgecolor='tab:grey',alpha=0.3)
              elif j>0:
                mpl.barh(i,df_age_only[column][i],color=colors[j],left=x_pos,align='center',edgecolor='tab:grey',alpha=0.3)
            
            x_pos+=df_age_only[column][i]
    
       
    
        mpl.yticks(np.arange(N_teams),df_age_only.index,fontsize=9)
    
        #Setting up legend:
        for i in range(len(colors)):
          mpl.plot([],[],color=colors[i],lw=10,label=str(df_age_only.keys()[i]))
        mpl.legend()
        
        mpl.show()
        mpl.savefig("stacked_six_viridis.png")
    if __name__ == "__main__":
        main()
    

    还有输出:

    您可以更改以突出显示条形的其他内容包括 edgecolorhatch

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 2021-10-04
      • 1970-01-01
      • 2019-12-12
      • 1970-01-01
      • 2017-09-17
      • 2021-12-10
      • 1970-01-01
      • 2022-11-22
      相关资源
      最近更新 更多