Search code examples
pythonmatplotlibplothistogramplot-annotations

plot textboxes and fill colors between vertical lines in matplotlib python


based on another thread i got this code:

data =  np.random.normal(loc=0.0, scale=1.0, size=2000)
df_data = pd.DataFrame(data)
import numpy as np
import scipy
import pandas as pd
from scipy.stats import norm
import matplotlib.pyplot as plt


def _plot(df):
    for col in df.columns:
        n_bins = 50
        fig, axes = plt.subplots(figsize=(12,6))
        n, bins, patches = axes.hist(df[col], n_bins, density=True, alpha=.1, edgecolor='black' )
        mu = df[col].mean()
        sigma = df[col].std()
        pdf = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(bins-mu)**2/(2*sigma**2))

        #probability density function
        axes.plot(bins, pdf, color='green', alpha=.6)

        #dashed lines
        plt.axvline(np.mean(df_data[0]),color='b', linestyle='-.')
        plt.axvline(np.mean(df_data[0]-sigma),color='b', linestyle='-.')
        plt.axvline(np.mean(df_data[0]-2*sigma),color='b', linestyle='-.')
        plt.axvline(np.mean(df_data[0]-3*sigma),color='b', linestyle='-.')
        plt.axvline(min(df_data[0]),color='r', linestyle='-.')
        plt.axvline(np.mean(df_data[0]+sigma),color='b', linestyle='-.')
        plt.axvline(np.mean(df_data[0]+2*sigma),color='b', linestyle='-.')
        plt.axvline(np.mean(df_data[0]+3*sigma),color='b', linestyle='-.')
        plt.axvline(max(df_data[0]),color='r', linestyle='-.')
        

        plt.ylabel('Probability Density')
        plt.xlabel('Values')

        print(mu)
        print(sigma)

_plot(df_data)

Which returns me this nice plot: enter image description here

As you can see the blue vertical lines indicate borders set by multiples of standard deviations. I would like to add the following information and color coding, which I now quickly placed in powerpoint:

enter image description here

I tried to mess with the plt.fill_between function but didnt really get anything useful. Also I do not know how to write something, like the mu+l*sigma here, above the plot. How can i achieve the second picture based on what I have?

EDIT: solved by @Trenton McKinney

enter image description here Putting new boxes inside the colored boxes:

for i, (x, c) in enumerate(locs[:-1]):
            axes.axvspan(x, locs[i + 1][0], alpha=0.2, color=c)
            tx = (x + locs[i + 1][0]) / 2
            axes.text(tx, y1/2, f'Zustand {i + 1}', {'ha': 'center', 'va': 'center'}, rotation=90)
            if i<4:
                axes.text(tx, y1/1.25, r"$\mu$" + "-" + f"{4-i}"+ "$\cdot$" + "$\sigma$" , {'ha': 'center', 'va': 'center'}, rotation=90, bbox=dict(facecolor='white', alpha=0.8, edgecolor='black'))
            else:
                axes.text(tx, y1/1.25, r"$\mu$" + "+" + f"{i-4 + 1}"+ "$\cdot$" + "$\sigma$" , {'ha': 'center', 'va': 'center'}, rotation=90, bbox=dict(facecolor='white', alpha=0.8, edgecolor='black'))

Solution

  • # extra imports
    from collections import OrderedDict
    from itertools import zip_longest
    
    np.random.seed(2022)
    data =  np.random.normal(loc=0.0, scale=1.0, size=2000)
    df_data = pd.DataFrame(data)
    
    
    def _plot(df):
        for col in df.columns:
            n_bins = 50
            fig, axes = plt.subplots(figsize=(12,6))
            n, bins, patches = axes.hist(df[col], n_bins, density=True, alpha=.1, edgecolor='black' )
            mu = df[col].mean()
            sigma = df[col].std()
            pdf = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(bins-mu)**2/(2*sigma**2))
    
            #probability density function
            axes.plot(bins, pdf, color='green', alpha=.6)
            
            # get ylim to position the text
            y0, y1 = axes.get_ylim()
            
            # create a dict for all the x values for vertical lines with the line color
            muu = {mu: 'b'}
            mm = {df_data[0].min(): 'r', df_data[0].max(): 'r'}
            mun = {df_data[0].sub(v*sigma).mean(): 'b' for v in range(1, 4)}
            mup = {df_data[0].add(v*sigma).mean(): 'b' for v in range(1, 4)}
            
            # combine the dicts: | requires python 3.9+. See linked SO answer for additional opitons to combine the dicts
            vals = muu | mm | mun | mup
            
            # order the keys (x values) from smallest to largest
            vals = OrderedDict(sorted(vals.items()))
            
            # plot the dashed lines
            for x, c in vals.items():
                plt.axvline(x, color=c, linestyle='-.')
                
            # combine the x values with colors of the stages
            locs = list(zip_longest(vals.keys(), ['blue', 'brown']*4))
            
            # iterate through all but the last value, and add the vspan and the text
            for i, (x, c) in enumerate(locs[:-1]):
                axes.axvspan(x, locs[i + 1][0], alpha=0.2, color=c)
                tx = (x + locs[i + 1][0]) / 2
                axes.text(tx, y1/2, f'Stage {i + 1}', {'ha': 'center', 'va': 'center'}, rotation=90)
    
            plt.ylabel('Probability Density')
            plt.xlabel('Values')
    
            print(mu)
            print(sigma)
    
        
    _plot(df_data)
    

    enter image description here

    Update for additional annotations

            # extra annotations
            sign = [f'µ - {v}σ' for v in range(4, 0, -1)]
            sigp = [f'µ + {v}σ' for v in range(1, 5)]
            anno = sign + sigp
            
            # iterate through all but the last value and add the vspan and the text
            for i, (x, c) in enumerate(locs[:-1]):
                axes.axvspan(x, locs[i + 1][0], alpha=0.2, color=c)
                tx = (x + locs[i + 1][0]) / 2
                axes.text(tx, y1/2, f'Stage {i + 1}: {anno[i]}', {'ha': 'center', 'va': 'center'}, rotation=90)
    

    enter image description here