Search code examples
pythonmatplotlibseabornerrorbargrouped-bar-chart

How can custom errorbars be aligned on grouped bars?


I have created a sns.catplot using seaborn. My goal is to obtain a barplot with error bars.

I followed this answer to error bars to my plot. However, I now find that my error bars, using the same ax.errorbar function no longer align to my bar plot.

I appreciate any answers or comments as to why sorting my data frame has caused this issue.

import pandas as pd
import matplotlib
import seaborn as sns

data = {'Parameter': ['$μ_{max}$', '$μ_{max}$', '$μ_{max}$', '$μ_{max}$', '$μ_{max}$', '$m$', '$m$', '$m$', '$m$', '$m$', '$\\alpha_D$', '$\\alpha_D$', '$\\alpha_D$', '$\\alpha_D$', '$\\alpha_D$', '$N_{max}$', '$N_{max}$', '$N_{max}$', '$N_{max}$', '$N_{max}$', '$\\gamma_{cell}$', '$\\gamma_{cell}$', '$\\gamma_{cell}$', '$\\gamma_{cell}$', '$\\gamma_{cell}$', '$K_d$', '$K_d$', '$K_d$', '$K_d$', '$K_d$'],
        'Output': ['POC', 'DOC', 'IC', 'Cells', 'Mean', 'POC', 'DOC', 'IC', 'Cells', 'Mean', 'POC', 'DOC', 'IC', 'Cells', 'Mean', 'POC', 'DOC', 'IC', 'Cells', 'Mean', 'POC', 'DOC', 'IC', 'Cells', 'Mean', 'POC', 'DOC', 'IC', 'Cells', 'Mean'],
        'Total-effect': [0.9806103414992552, -7.054718234598588e-10, 0.1960778044402512, 0.2537531550865872, 0.3576103250801555, 0.1663846098641205, 1.0851909901687566, 0.2563681021056311, 0.0084168031549801, 0.3790901263233721, 0.0031054085922008, 0.0002724061050653, 0.1659030569337202, 0.2251452993113863, 0.0986065427355931, 0.0340237460462674, 0.3067235088110348, 0.3150260538485233, 0.3349234507482945, 0.24767418986353, 0.1938746960877987, -6.17103884336228e-07, 0.0041542186143554, 0.0032055759222461, 0.050308468380129, 0.0417496162986251, 2.328088857274425e-09, 0.9483137697398172, 0.9881583951740854, 0.4945554458851541],
        'First-order': [0.7030107013984165, 2.266962154339895e-19, 0.0062233586910709, 0.001029343445717, 0.1775658508838011, 0.0007896517048184, 0.7264368524472167, 0.0072701545157557, 0.0047752182357577, 0.1848179692258871, -2.123427373989929e-05, 2.395667282242805e-19, 0.0055179953736572, 0.0004377224837127, 0.0014836208959075, -1.509666411558862e-06, 6.068293373049956e-20, 0.0115237519530005, 0.0009532607225978, 0.0031188757522967, 0.0117401346791109, 3.482140934635793e-24, 0.0015109239301033, -2.9803014832201013e-08, 0.0033127572015498, 0.0015795893288074, 3.393882814623132e-17, 0.3451307225252993, 0.4106729024860886, 0.1893458035850488],
        'Total Error': [0.0005752772018327, 1.3690325778564916e-09, 0.0033197127516203, 0.0042203628326116, 0.0020288385387743, 0.0007817126652407, 0.074645390474463, 0.0016832816591233, 0.0023529269720789, 0.0198658279427265, 0.0001233951911322, 0.0023340612253369, 0.0029383350061101, 0.003741247467092, 0.0022842597224178, 0.0005740976276596, 0.1017075201238418, 0.0016784578928217, 0.0037270295879161, 0.0269217763080598, 0.0009021103063017, 4.619682769520493e-07, 0.0005201826302926, 0.0005615428740041, 0.0004960744447188, 0.000910170372727, 1.0571905831111963e-09, 0.0029389557787801, 0.0054832440706334, 0.0023330928198327],
        'First Error': [0.0024072925459877, 9.366089709991011e-20, 0.0002667351219131, 0.0002702376243862, 0.0007360663230718, 0.0002586411466273, 0.0409234887280223, 0.0005053286335856, 0.0003348751699561, 0.0105055834195478, 2.195881790893627e-05, 8.208495135059976e-20, 0.0001643584459509, 0.0002162523113349, 0.0001006423937987, 0.0001928274220008, 3.4836161809305005e-20, 0.0005126354796536, 0.0005972681850905, 0.0003256827716862, 0.0003252835339205, 5.013811598030501e-24, 3.247452070080876e-05, 8.972262407759052e-08, 8.946194431135658e-05, 0.0001221659592046, 2.8775799201024936e-18, 0.0033817071114312, 0.0058875798799757, 0.0023478632376529]}
df = pd.DataFrame(data)

# Picks outputs to show
show_vars = ["Mean"]

err_df = df.melt(id_vars=["Parameter", "Output"], value_vars=["Total Error", "First Error"], var_name="Error").sort_values(by="Parameter")
df = df.melt(id_vars=["Parameter", "Output"], value_vars=["Total-effect", "First-order"], var_name="Sobol index", value_name="Value").sort_values(by="Parameter")

# Plot
grid = sns.catplot(data=df[df["Output"].isin(show_vars)], x="Parameter", y="Value", col="Output", col_wrap=2,
                   hue="Sobol index", kind="bar", aspect=1.8, legend_out=False)

grid.set_titles(col_template="Sensitivity with respect to {col_name}")

# Add error lines and values
for ax, var in zip(grid.axes.ravel(), show_vars):
    # Value labels
    for i, c in enumerate(ax.containers):
        if type(c) == matplotlib.container.BarContainer:
            ax.bar_label(c, labels=[f'{v.get_height():.2f}' if v.get_height() >= 0.01 else "<0.01" for v in c],
                         label_type='center')

    # Error bars
    ticklocs = ax.xaxis.get_majorticklocs()
    offset = ax.containers[0][0].get_width() / 2
    ax.errorbar(x=np.append(ticklocs - offset, ticklocs + offset), y=df[df["Output"] == var]["Value"],
                yerr=err_df[err_df["Output"] == var]["value"], ecolor='black', linewidth=0, elinewidth=2, capsize=2)  # Careful: array order matters

    # Change title for mean
    if var == "Mean":
        ax.set_title("Average sensitivity across outputs")

grid.tight_layout()

Output: enter image description here

I did try to sort the select dataframes by doing:

y=df[df["Output"] == var].sort_values(by="Parameter")["Value"], yerr=err_df[err_df["Output"] == var].sort_values(by="Parameter")["value"]

This despite the fact that order in the data frame seems to be preserved across operations.


Solution

    • is a high-level API for and uses matplotlib as the default plotting backend. Both packages work with matplotlib in different ways, which make certain types of plots and customizations easier.
    • Tested in python 3.11, pandas 1.5.2, matplotlib 3.6.2, seaborn 0.12.1
    import matplotlib as mpl
    import pandas as pd
    
    # set the index as the column to be the x-axis
    df = df.set_index('Parameter')
    
    # select the Mean data
    df_mean = df[df.Output.eq('Mean')]
    
    # specify the columns to use for the errors
    yerr = df_mean[['Total Error', 'First Error']]
    
    # the columns must be the same name as the columns used for the data values
    yerr.columns = ['Total-effect', 'First-order']
    
    # plot the selected data and add the yerr
    ax = df_mean.plot(kind='bar', y=['Total-effect', 'First-order'], yerr=yerr, rot=0, figsize=(12, 8), title='Average sensitivity across outputs')
    
    # iterate through each group of bars
    for c in ax.containers:
        # add labels to the bars
        if type(c) == mpl.container.BarContainer:
            labels=[f'{h:.2f}' if (h := v.get_height()) >= 0.01 else "<0.01" for v in c]
            ax.bar_label(c, labels=labels, label_type='center')
    

    enter image description here