Search code examples
pythonpandasmatplotlibseabornplot-annotations

Include notes in seaborn barplot


I have a barplot that I would like to include a note at the bottom. The current code is shown below

import pandas as pd
import numpy as np
import seaborn as sns

import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter

data = {
'id': [1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22],
'survey': ['baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline', 'baseline', 'endline'],
'level': ['low', 'high', 'medium', 'low', 'high', 'medium', 'medium', 'high', 'low', 'low', 'medium', 'high', 'low', 'medium', 'low', 'high', 'low', 'low', 'medium', 'high', 'high', 'high', 'high', 'medium', 'low', 'low', 'medium', 'high', 'low', 'medium', 'high', 'medium', 'low', 'high', 'high', 'medium', 'medium', 'low', 'high', 'low', 'low', 'low', 'low', 'low']
}

df = pd.DataFrame(data)

df_N = df.groupby(['level']).count().sort_index(ascending = True).reset_index()
df_N['%'] = 100 * df_N['id'] / df_N['id'].sum()

sns.set_style('white')
ax = sns.barplot(data=df_N, x='level', y='%', ci=None,
                 palette="rainbow")

N = df_N['id'].to_numpy()
N_it = '$\it{N}$'
labels=[f'{np.round(perc,1)}% ({N_it} = {n})' 
        for perc, n in zip(ax.containers[0].datavalues, N)]

ax.bar_label(ax.containers[0], labels = labels, fontsize = 10)

sns.despine(ax=ax, left=True)
ax.grid(True, axis='y')
ax.yaxis.set_major_formatter(PercentFormatter(100))
ax.set_xlabel('')
ax.set_ylabel('')
ax.margins(y=0.15)  # optionally some more free space at the top
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
plt.tight_layout()
plt.show()

I would like to include a note based on disaggregation below

df_N = df.groupby(['survey', 'level']).count().sort_index(ascending = True).reset_index()
df_N

Specifically:

Note: Baseline: high - 6, low - 10, medium - 6 Endline: high - 8, low - 8, medium - 6


Solution

  • You can use text() from the seaborn module:

    df_N = df.groupby(['survey', 'level']).count().sort_index(ascending = True).reset_index()
    u_values = df_N['survey'].unique()
    txt = ""
    for val in u_values:
        txt += val + ": "
        a = df_N[df_N['survey'] == val][['level','id']].values.flatten().tolist()
        for i, j in enumerate(a):
            if i % 2 == 0:
                    txt += str(j) + " - "
            else:
                    txt += str(j) + ", "
    txt = txt[:-2]
    ax.text(-0.4, -5, txt, fontsize=10)
    

    This is the result graph that I get: enter image description here

    Note: The code above was added before the plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0) line in your code.