Search code examples
pythonpandasseaborn

Customize the spacing between seaborn grouped box plots


I 3 main groups (A, B, C) and each group is subdivided in two (1 and 2). I want to plot the box plot where the spacing between the box plots for each main group is larger than the sub-groups. I have the following code but while I can adjust the x tick labels., I can not adjust the spacing between the box plots to line up

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Create the NumPy arrays
FA_1 = np.random.randint(200, 230, 20)
FA_2 = np.random.randint(180, 210, 20)
FB_1 = np.random.randint(130, 160, 20)
FB_2 = np.random.randint(140, 170, 20)
FC_1 = np.random.randint(80, 110, 20)
FC_2 = np.random.randint(60, 90, 20)

# Create a list of tuples for the DataFrame
data = [
    (FA_1, 'A_1'), (FA_2, 'A_2'),
    (FB_1, 'B_1'), (FB_2, 'B_2'),
    (FC_1, 'C_1'), (FC_2, 'C_2')
]

# Create the DataFrame
df = pd.DataFrame(data, columns=['Force', 'Config'])

# Explode the 'Force' column to stack the NumPy arrays
df = df.explode('Force', ignore_index=True)

# Create a box plot using Seaborn with custom spacing
plt.figure(figsize=(10, 6))

# Manually adjust the positions for each configuration
positions = {'A_1': 0, 'A_2': 1, 'B_1': 2, 'B_2': 3, 'C_1': 4, 'C_2': 5}

sns.boxplot(x='Config', y='Force', data=df, order=['A_1', 'A_2', 'B_1', 'B_2', 'C_1', 'C_2'], width=0.5)
plt.title('Box Plot of Force by Configuration')
plt.xlabel('Configuration')
plt.ylabel('Force')
plt.xticks(ticks=[positions[config] for config in df['Config'].unique()], labels=df['Config'].unique())
plt.tight_layout()
plt.show()

Solution

  • For grouping, Seaborn works with a hue parameter. You can split the Config column into explicit groups and subgroups such that the groups can be used for x and the subgroups for hue.

    import matplotlib.pyplot as plt
    import seaborn as sns
    import pandas as pd
    import numpy as np
    
    # Create the NumPy arrays
    FA_1 = np.random.randint(200, 230, 20)
    FA_2 = np.random.randint(180, 210, 20)
    FB_1 = np.random.randint(130, 160, 20)
    FB_2 = np.random.randint(140, 170, 20)
    FC_1 = np.random.randint(80, 110, 20)
    FC_2 = np.random.randint(60, 90, 20)
    
    # Create a list of tuples for the DataFrame
    data = [
        (FA_1, 'A_1'), (FA_2, 'A_2'),
        (FB_1, 'B_1'), (FB_2, 'B_2'),
        (FC_1, 'C_1'), (FC_2, 'C_2')
    ]
    
    # Create the DataFrame
    df = pd.DataFrame(data, columns=['Force', 'Config'])
    
    # Explode the 'Force' column to stack the NumPy arrays
    df = df.explode('Force', ignore_index=True)
    
    df[['Group', 'Subgroup']] = df['Config'].str.split(pat="_", expand=True)
    
    # Create a box plot using Seaborn with custom spacing
    plt.figure(figsize=(10, 6))
    
    ax = sns.boxplot(x='Group', order=['A', 'B', 'C'],
                     hue='Subgroup', hue_order=['1', '2'],
                     y='Force', data=df, palette='spring')
    
    ax.set_title('Box Plot of Force by Configuration')
    ax.set_xlabel('Configuration')
    ax.set_ylabel('Force')
    plt.tight_layout()
    plt.show()
    

    seaborn boxplot with grouping

    To get more spacing, you could use sns.catplot(), the figure-level version of sns.boxplot, and create one subplot per group, e.g.

    g = sns.catplot(kind='box',
                    x='Subgroup', order=['1', '2'],
                    col='Group', col_order=['A', 'B', 'C'],
                    y='Force', data=df, palette='spring')