Search code examples
pythonplotlyplotly.graph-objects

Is there a way to overlay scatterplot over grouped boxplots so they aren't offset using plotly graph_objects?


I'm trying to get the scatter plots to lie ontop of their respective boxplots to act as outlier points. Since plotly's graph_object.box doesn't have a method of inputting precalculated outliers, I've been trying to do it this way. I don't want plotly to calculate the outliers for the purposes of the project. Is there any way to accomplish this by moving the scatterplot, or perhaps a feature of go.Box I overlooked that can do this?

import plotly.graph_objects as go

def create_multiple_boxplots(summary_stats_list, labels, types, title="Multiple Boxplots"):
    fig = go.Figure()
    
    color_map = {"Rainy": "blue", "Sunny": "green"}
    
    i=0
    for stats, label, type_ in zip(summary_stats_list, labels, types):
        fig.add_trace(go.Box(
            name=type_,
            q1=[stats['Q1']],
            median=[stats['Median']],
            q3=[stats['Q3']],
            lowerfence=[stats['Min']],
            upperfence=[stats['Max']],
            mean=[stats['Mean']],
            boxpoints='all' if 'Outliers' in stats else False,
            jitter=0.3,
            pointpos=-1.8,
            marker=dict(color=color_map[type_]),  # Assign color based on type
            legendgroup=type_,
            showlegend=True if i < 2 else False,
            x=[label],
            # y=stats.get('Outliers', [])
        ))
        # Add outlier points separately

        fig.add_trace(go.Scatter(
            x=[label] * len(stats['Outliers']),
            y=stats['Outliers'],
            mode='markers',
            marker=dict(color=color_map[type_], size=8, symbol='circle-open'),
            name=f"Outliers - {type_}",
            legendgroup=type_,
            showlegend=False
        ))
        i+=1
    
    fig.update_layout(title=title, yaxis_title="Value", boxmode='group')
    
    fig.show()

# Example summary statistics
data_summaries = [
    {"Min": 5, "Q1": 10, "Median": 15, "Q3": 20, "Max": 25, "Mean": 16, "Outliers": [2, 27]},
    {"Min": 6, "Q1": 11, "Median": 16, "Q3": 21, "Max": 26, "Mean": 17, "Outliers": [3, 28]},
    {"Min": 4, "Q1": 9, "Median": 14, "Q3": 19, "Max": 24, "Mean": 15, "Outliers": [1, 26]},
    {"Min": 7, "Q1": 12, "Median": 17, "Q3": 22, "Max": 27, "Mean": 18, "Outliers": [4, 29]}
]
labels = ["Happy", "Happy", "Sad", "Sad"]
types = ["Rainy", "Sunny", "Rainy", "Sunny"]

create_multiple_boxplots(data_summaries, labels, types)

Boxplot with incorrectly aligned scatterplot points


Solution

  • The desired output can be obtained by exiting box mode and making each label unique. This is because the x-axis of the box-and-whisker and scatter plots will be the same.

    import plotly.graph_objects as go
    
    def create_multiple_boxplots(summary_stats_list, labels, types, title="Multiple Boxplots"):
        fig = go.Figure()
        
        color_map = {"Rainy": "blue", "Sunny": "green"}
        
        i=0
        for stats, label, type_ in zip(summary_stats_list, labels, types):
            fig.add_trace(go.Box(
                name=type_,
                q1=[stats['Q1']],
                median=[stats['Median']],
                q3=[stats['Q3']],
                lowerfence=[stats['Min']],
                upperfence=[stats['Max']],
                mean=[stats['Mean']],
                boxpoints='all' if 'Outliers' in stats else False,
                jitter=0.3,
                pointpos=-1.8,
                marker=dict(color=color_map[type_]),  # Assign color based on type
                legendgroup=type_,
                showlegend=True if i < 2 else False,
                x=[label],
            ))
            fig.add_trace(go.Scatter(
                x=[label] * len(stats['Outliers']),
                y=stats['Outliers'],
                mode='markers',
                marker=dict(color=color_map[type_], size=8, symbol='circle-open'),
                name=f"Outliers - {type_}",
                legendgroup=type_,
                showlegend=False
            ))
            i+=1
        
        fig.update_layout(title=title, yaxis_title="Value")#, boxmode='group') # update
        
        fig.show()
    
    # Example summary statistics
    data_summaries = [
        {"Min": 5, "Q1": 10, "Median": 15, "Q3": 20, "Max": 25, "Mean": 16, "Outliers": [2, 27]},
        {"Min": 6, "Q1": 11, "Median": 16, "Q3": 21, "Max": 26, "Mean": 17, "Outliers": [3, 28]},
        {"Min": 4, "Q1": 9, "Median": 14, "Q3": 19, "Max": 24, "Mean": 15, "Outliers": [1, 26]},
        {"Min": 7, "Q1": 12, "Median": 17, "Q3": 22, "Max": 27, "Mean": 18, "Outliers": [4, 29]}
    ]
    labels = ["Happy", "Happy_", "Sad", "Sad_"] # update
    types = ["Rainy", "Sunny", "Rainy", "Sunny"]
    
    create_multiple_boxplots(data_summaries, labels, types)
    

    enter image description here

    To do a scatter plot while still in box mode, add an offset group, which will draw the scatter plot in the center of the box-and-whisker plot.

    import plotly.graph_objects as go
    
    def create_multiple_boxplots(summary_stats_list, labels, types, title="Multiple Boxplots"):
        fig = go.Figure()
        
        color_map = {"Rainy": "blue", "Sunny": "green"}
        offsetgroup_names = ['A','B','A','B'] # update
    
        i=0
        for stats, label, type_, offset in zip(summary_stats_list, labels, types, offsetgroup_names):
            print('stats', stats, 'label',label, 'type', type_)
            fig.add_trace(go.Box(
                name=type_,
                q1=[stats['Q1']],
                median=[stats['Median']],
                q3=[stats['Q3']],
                lowerfence=[stats['Min']],
                upperfence=[stats['Max']],
                mean=[stats['Mean']],
                boxpoints='all' if 'Outliers' in stats else False,
                jitter=0.3,
                pointpos=-1.8,
                marker=dict(color=color_map[type_]),  # Assign color based on type
                legendgroup=type_,
                showlegend=True if i < 2 else False,
                x=[label],
                offsetgroup=offset, # update
            ))
            fig.add_trace(go.Scatter(
                x=[label] * len(stats['Outliers']),
                y=stats['Outliers'],
                xaxis='x',
                yaxis='y',
                offsetgroup=offset, # update
                mode='markers',
                marker=dict(color=color_map[type_], size=8, symbol='circle-open'),
                name=f"Outliers - {type_}",
                legendgroup=type_,
                showlegend=False
            ))
            i+=1
        
        fig.update_layout(title=title, yaxis_title="Value", boxmode='group')
        fig.show()
    
    # Example summary statistics
    data_summaries = [
        {"Min": 5, "Q1": 10, "Median": 15, "Q3": 20, "Max": 25, "Mean": 16, "Outliers": [2, 27]},
        {"Min": 6, "Q1": 11, "Median": 16, "Q3": 21, "Max": 26, "Mean": 17, "Outliers": [3, 28]},
        {"Min": 4, "Q1": 9, "Median": 14, "Q3": 19, "Max": 24, "Mean": 15, "Outliers": [1, 26]},
        {"Min": 7, "Q1": 12, "Median": 17, "Q3": 22, "Max": 27, "Mean": 18, "Outliers": [4, 29]}
    ]
    labels = ["Happy", "Happy", "Sad", "Sad"]
    types = ["Rainy", "Sunny", "Rainy", "Sunny"]
    
    create_multiple_boxplots(data_summaries, labels, types)
    

    enter image description here