I have created a sns.catplot
using seaborn. My goal is to obtain a barplot with error bars.
I followed this answer to error bars to my plot. However, I now find that my error bars, using the same ax.errorbar
function no longer align to my bar plot.
I appreciate any answers or comments as to why sorting my data frame has caused this issue.
import pandas as pd
import matplotlib
import seaborn as sns
data = {'Parameter': ['$μ_{max}$', '$μ_{max}$', '$μ_{max}$', '$μ_{max}$', '$μ_{max}$', '$m$', '$m$', '$m$', '$m$', '$m$', '$\\alpha_D$', '$\\alpha_D$', '$\\alpha_D$', '$\\alpha_D$', '$\\alpha_D$', '$N_{max}$', '$N_{max}$', '$N_{max}$', '$N_{max}$', '$N_{max}$', '$\\gamma_{cell}$', '$\\gamma_{cell}$', '$\\gamma_{cell}$', '$\\gamma_{cell}$', '$\\gamma_{cell}$', '$K_d$', '$K_d$', '$K_d$', '$K_d$', '$K_d$'],
'Output': ['POC', 'DOC', 'IC', 'Cells', 'Mean', 'POC', 'DOC', 'IC', 'Cells', 'Mean', 'POC', 'DOC', 'IC', 'Cells', 'Mean', 'POC', 'DOC', 'IC', 'Cells', 'Mean', 'POC', 'DOC', 'IC', 'Cells', 'Mean', 'POC', 'DOC', 'IC', 'Cells', 'Mean'],
'Total-effect': [0.9806103414992552, -7.054718234598588e-10, 0.1960778044402512, 0.2537531550865872, 0.3576103250801555, 0.1663846098641205, 1.0851909901687566, 0.2563681021056311, 0.0084168031549801, 0.3790901263233721, 0.0031054085922008, 0.0002724061050653, 0.1659030569337202, 0.2251452993113863, 0.0986065427355931, 0.0340237460462674, 0.3067235088110348, 0.3150260538485233, 0.3349234507482945, 0.24767418986353, 0.1938746960877987, -6.17103884336228e-07, 0.0041542186143554, 0.0032055759222461, 0.050308468380129, 0.0417496162986251, 2.328088857274425e-09, 0.9483137697398172, 0.9881583951740854, 0.4945554458851541],
'First-order': [0.7030107013984165, 2.266962154339895e-19, 0.0062233586910709, 0.001029343445717, 0.1775658508838011, 0.0007896517048184, 0.7264368524472167, 0.0072701545157557, 0.0047752182357577, 0.1848179692258871, -2.123427373989929e-05, 2.395667282242805e-19, 0.0055179953736572, 0.0004377224837127, 0.0014836208959075, -1.509666411558862e-06, 6.068293373049956e-20, 0.0115237519530005, 0.0009532607225978, 0.0031188757522967, 0.0117401346791109, 3.482140934635793e-24, 0.0015109239301033, -2.9803014832201013e-08, 0.0033127572015498, 0.0015795893288074, 3.393882814623132e-17, 0.3451307225252993, 0.4106729024860886, 0.1893458035850488],
'Total Error': [0.0005752772018327, 1.3690325778564916e-09, 0.0033197127516203, 0.0042203628326116, 0.0020288385387743, 0.0007817126652407, 0.074645390474463, 0.0016832816591233, 0.0023529269720789, 0.0198658279427265, 0.0001233951911322, 0.0023340612253369, 0.0029383350061101, 0.003741247467092, 0.0022842597224178, 0.0005740976276596, 0.1017075201238418, 0.0016784578928217, 0.0037270295879161, 0.0269217763080598, 0.0009021103063017, 4.619682769520493e-07, 0.0005201826302926, 0.0005615428740041, 0.0004960744447188, 0.000910170372727, 1.0571905831111963e-09, 0.0029389557787801, 0.0054832440706334, 0.0023330928198327],
'First Error': [0.0024072925459877, 9.366089709991011e-20, 0.0002667351219131, 0.0002702376243862, 0.0007360663230718, 0.0002586411466273, 0.0409234887280223, 0.0005053286335856, 0.0003348751699561, 0.0105055834195478, 2.195881790893627e-05, 8.208495135059976e-20, 0.0001643584459509, 0.0002162523113349, 0.0001006423937987, 0.0001928274220008, 3.4836161809305005e-20, 0.0005126354796536, 0.0005972681850905, 0.0003256827716862, 0.0003252835339205, 5.013811598030501e-24, 3.247452070080876e-05, 8.972262407759052e-08, 8.946194431135658e-05, 0.0001221659592046, 2.8775799201024936e-18, 0.0033817071114312, 0.0058875798799757, 0.0023478632376529]}
df = pd.DataFrame(data)
# Picks outputs to show
show_vars = ["Mean"]
err_df = df.melt(id_vars=["Parameter", "Output"], value_vars=["Total Error", "First Error"], var_name="Error").sort_values(by="Parameter")
df = df.melt(id_vars=["Parameter", "Output"], value_vars=["Total-effect", "First-order"], var_name="Sobol index", value_name="Value").sort_values(by="Parameter")
# Plot
grid = sns.catplot(data=df[df["Output"].isin(show_vars)], x="Parameter", y="Value", col="Output", col_wrap=2,
hue="Sobol index", kind="bar", aspect=1.8, legend_out=False)
grid.set_titles(col_template="Sensitivity with respect to {col_name}")
# Add error lines and values
for ax, var in zip(grid.axes.ravel(), show_vars):
# Value labels
for i, c in enumerate(ax.containers):
if type(c) == matplotlib.container.BarContainer:
ax.bar_label(c, labels=[f'{v.get_height():.2f}' if v.get_height() >= 0.01 else "<0.01" for v in c],
label_type='center')
# Error bars
ticklocs = ax.xaxis.get_majorticklocs()
offset = ax.containers[0][0].get_width() / 2
ax.errorbar(x=np.append(ticklocs - offset, ticklocs + offset), y=df[df["Output"] == var]["Value"],
yerr=err_df[err_df["Output"] == var]["value"], ecolor='black', linewidth=0, elinewidth=2, capsize=2) # Careful: array order matters
# Change title for mean
if var == "Mean":
ax.set_title("Average sensitivity across outputs")
grid.tight_layout()
I did try to sort the select dataframes by doing:
y=df[df["Output"] == var].sort_values(by="Parameter")["Value"], yerr=err_df[err_df["Output"] == var].sort_values(by="Parameter")["value"]
This despite the fact that order in the data frame seems to be preserved across operations.
matplotlib
as the default plotting backend. Both packages work with matplotlib
in different ways, which make certain types of plots and customizations easier.
seaborn.barplot
automatically aggregates data and adds errors bars, however, since this data is already aggregated, and has columns of data with the errors, it's easier to add the errors with pandas.DataFrame.plot
and the yerr
parameter.python 3.11
, pandas 1.5.2
, matplotlib 3.6.2
, seaborn 0.12.1
import matplotlib as mpl
import pandas as pd
# set the index as the column to be the x-axis
df = df.set_index('Parameter')
# select the Mean data
df_mean = df[df.Output.eq('Mean')]
# specify the columns to use for the errors
yerr = df_mean[['Total Error', 'First Error']]
# the columns must be the same name as the columns used for the data values
yerr.columns = ['Total-effect', 'First-order']
# plot the selected data and add the yerr
ax = df_mean.plot(kind='bar', y=['Total-effect', 'First-order'], yerr=yerr, rot=0, figsize=(12, 8), title='Average sensitivity across outputs')
# iterate through each group of bars
for c in ax.containers:
# add labels to the bars
if type(c) == mpl.container.BarContainer:
labels=[f'{h:.2f}' if (h := v.get_height()) >= 0.01 else "<0.01" for v in c]
ax.bar_label(c, labels=labels, label_type='center')