Search code examples
pythonnumpymatplotlibhistogramgrouped-bar-chart

How to calculate sum and average bars for every histogram bin


I would like to plot stacked bar charts. The first 3 bars are red, black and blue as shown below.

I want to add a fourth bar that is the 'sum' of the values of red, black and blue bars.

(In another case I want to add a fourth bar that is the 'average' of red, black and blue bars.)

For example, this is my code:

import numpy as np
import matplotlib.pyplot as plt

np.random.seed(19680801)

n_bins = 20
x = np.random.randn(1000, 3)

fig, ax0 = plt.subplots(nrows=1, ncols=1)

colors = ['red', 'black', 'blue']
ax0.hist(x, n_bins, density=True, histtype='bar', color=colors, label=colors)
ax0.legend(prop={'size': 10})
ax0.set_title('bars with legend')



fig.tight_layout()
plt.show()

This figure is without the 4th bar. enter image description here

Any suggestions how of how I can show the 4th bar in this stack bar chart?


Solution

  • Use np.histogram to calculate hist and bin_edges for each group, calculate the sum and mean of each group, and plot bar plots with the absolute bar height for each bin.

    The easiest way to plot grouped, or stacked bars, is from a pandas.DataFrame with pandas.DataFrame.plot.

    Tested in python 3.11.2, pandas 2.0.1, matplotlib 3.7.1, numpy 1.24.3

    import numpy as np
    import pandas as pd
    
    # data
    np.random.seed(19680801)
    n_bins = 20
    x = np.random.randn(1000, 3)
    
    # calculate bin_edges for the combined values
    _, be = np.histogram(x, bins=n_bins, density=True)
    
    # calculate hist for each sample
    data = {f's{i}': np.histogram(col, bins=be, density=True)[0] for i, col in enumerate(x.T)}
    
    # round the values of the bin edges to be used as xtick labels in the plot
    be = be.round(1)
    
    # stack the arrays for each bin group
    groups = np.column_stack(list(data.values()))
    
    # calculate the total
    data['tot'] = groups.sum(axis=1)
    
    # calculate the mean
    data['mean'] = groups.mean(axis=1)
    
    # create a dataframe from data
    df = pd.DataFrame(data=data)
    
    # plot 
    ax = df.plot(kind='bar', width=0.85, ec='k', figsize=(11, 6), rot=0)
    
    # update the xticks by shifting them to the left by 0.5, and updating the labels
    ax.set_xticks(ticks=np.arange(0, len(be))-0.5, labels=be)
    
    # add some cosmetics
    ax.grid(axis='x')
    ax.spines[['right', 'top']].set_visible(False)
    _ = ax.legend(bbox_to_anchor=(1, 0.5), loc='center left', frameon=False)
    

    enter image description here

    • Use .iloc, or y=['s0', 's1', 's2', 'mean'] to select the specific columns to plot.
    # plot 
    ax = df.iloc[:, [0, 1, 2, 4]].plot(kind='bar', width=0.85, ec='k', figsize=(14, 6), rot=0)
    
    # add some cosmetics
    ax.set_xticks(ticks=np.arange(0, len(be))-0.5, labels=be)
    ax.grid(axis='x')
    ax.spines[['right', 'top']].set_visible(False)
    _ = ax.legend(bbox_to_anchor=(1, 0.5), loc='center left', frameon=False)
    
    # add some labels
    for c in ax.containers:
        ax.bar_label(c, fmt=lambda x: f'{x:0.3f}' if x > 0.005 else '', label_type='edge', fontsize=6, rotation=90, padding=3)
        
    ax.margins(y=0.15)
    

    enter image description here