Search code examples
pythonpandasmatplotlibseabornsubplot

How to do multi-row layout using matplotlib subplots


This is a question about how to properly organize subplots, not how to create stacked bars.

I have the following dataframe:

     corpus group  mono p  non p  plus p  minus p
0  fairview   all      49     51      49        0
1      i2b2   all      46     54      46        0
2    mipacq   all      44     56      43        1

and want to arrange the output as given in the two attached figures so that I get ncolumns and 2-rows, instead of two separate subplots with 1 row each (so in this case, there would be 2-rows, 3-columns on a single subplot instead of 1-row, 3-columns on 2 subplots):

Figure 11 - #1

Figure 11 - #2

I am generating these two figures as separate subplots using the following code:

data = <above dataframe>
semgroups = ['all']
corpus = ['fairview', 'i2b2', 'mipacq']
for sg in semgroups:

    i = semgroups.index(sg)
    ix = i + 7

    ncols = len(set(data.corpus.tolist()))
    nrows = len(set(data.group.tolist()))

    fig = plt.figure()
    
    fig, axs = plt.subplots(1, ncols, sharey=True)

    for ax,(idx,row) in zip(axs.flat, data.iterrows()):
        # I WANT TO PLOT BOTH ROWS on same subplot
        #row[['mono p', 'non p']].plot.bar(ax=ax, color=['C0','C1'])
        row[['plus p', 'minus p']].plot.bar(ax=ax, color=['C0','C1'])
        if row['corpus'] == 'fairview':
            corpus = 'Fairview'
            label =  '(d) '
        elif row['corpus'] == 'mipacq':
            corpus = 'MiPACQ'
            if ncols == 3:
                label = '(f) '
            else:
                label = '(b) '
        else:
            corpus = 'i2b2'
            label = '(e) '
        
        ax.set_title(label + corpus)
        ax.tick_params(axis='x', labelrotation = 45)
    
    if sg == 'all':
        sg = 'All groups'

    # Defining custom 'xlim' and 'ylim' values.
    custom_ylim = (0, 60)

    # Setting the values for all axes.
    plt.setp(axs, ylim=custom_ylim)
    fig.suptitle('Figure ' + str(ix) + ' ' + sg)

In the code above, I iterate through my df grabbing the following rows to generate both separate subplots:

# BUT, I WANT TO PLOT BOTH ROWS ON SAME SUBPLOT
row[['mono p', 'non p']].plot.bar(ax=ax, color=['C0','C1'])
row[['plus p', 'minus p']].plot.bar(ax=ax, color=['C0','C1'])

No matter how I do this I cannot get the desired two rows in a single subplot(I always get an empty row of plots with no data on the second row).


Solution

    • See inline comments
    • Tested in python 3.8.12, pandas 1.3.3, matplotlib 3.4.3, seaborn 0.11.2
    import pandas as pd
    import matplotlib.pyplot as plt
    import seaborn as sns  # seaborn is a high-level api for matplotlib
    
    # sample dataframe
    data = {'corpus': ['fairview', 'i2b2', 'mipacq'], 'group': ['all', 'all', 'all'], 'mono p': [49, 46, 44], 'non p': [51, 54, 56], 'plus p': [49, 46, 43], 'minus p': [0, 0, 1]}
    df = pd.DataFrame(data)
    
    semgroups = df.group.unique()  # unique groups
    corpus = df.corpus.unique()  # unique corpus
    rows = [['mono p', 'non p'], ['plus p', 'minus p']]  # columns for each row of plots
    for sg in semgroups:
        
        i = semgroups.index(sg)
        ix = i + 7
    
        ncols = len(corpus)  # 3 columns for the example
        nrows = len(rows)  # 2 rows for the example
    
        # create a figure with 2 rows of 3 columns: axes is a 2x3 array of <AxesSubplot:>
        fig, axes = plt.subplots(nrows, ncols, sharey=True, figsize=(12, 10))
    
        # iterate through each plot row combined with a list from rows
        for axe, row in zip(axes, rows):
            # iterate through each plot column of the current row
            for i, ax in enumerate(axe):
                
                # select the data for each plot
                data = df.loc[df.group.eq(sg) & df.corpus.eq(corpus[i]), row]
                
                # plot the dataframe, but setting the bar color is more difficult
    #             data.T.plot(kind='bar', legend=False, ax=ax)
                
                # plot the data with seaborn, which is easier to color the bars
                sns.barplot(data=data, ax=ax)
    
                if corpus[i] == 'fairview':
                    l2 = 'Fairview'
                    l1 =  '(d) '
                elif corpus[i] == 'mipacq':
                    l2 = 'MiPACQ'
                    if ncols == 3:
                        l1 = '(f) '
                    else:
                        l1 = '(b) '
                else:
                    l2 = 'i2b2'
                    l1 = '(e) '
    
                ax.set_title(l1 + l2)
                ax.tick_params(axis='x', labelrotation = 45)
        
        if sg == 'all':
            sg = 'All groups'
    
        # Defining custom 'xlim' and 'ylim' values.
        custom_ylim = (0, 60)
    
        # Setting the values for all axes.
        plt.setp(axes, ylim=custom_ylim)
        fig.suptitle('Figure ' + str(ix) + ' ' + sg)
        fig.tight_layout()
        plt.show()
    

    enter image description here