Search code examples
pythonplotlyboxplotplotly-python

Can I calculate the p-value and add asterisk using plotly?


For example, we have dataset tips with columns day, total_bill and sex. I want to visualize boxplots (x=day, y=total_bill,color=sex). After that I want to calculate test and p-value in every day between female and male participants. If p-value < 0.05, I want to add asterisk. How could I change the code below?

In this example the comparison between different days without sex:

from scipy import stats
import plotly.express as px
import plotly.graph_objects as go

tips = px.data.tips()

fig = go.Figure()
for day in ['Thur','Fri','Sat','Sun']:
    fig.add_trace(go.Box(
        y=tips[tips['day'] == day].total_bill,
        name=day,
        boxpoints='outliers'
    ))

def add_pvalue_annotation(days, y_range, symbol=''):
    """
    arguments:
    days --- a list of two different days e.g. ['Thur','Sat']
    y_range --- a list of y_range in the form [y_min, y_max] in paper units
    """
    pvalue = stats.ttest_ind(
        tips[tips['day']==days[0]].total_bill,
        tips[tips['day']==days[1]].total_bill)[1]
    # print(pvalue)
    if pvalue >= 0.05:
        symbol = 'ns'
    if pvalue < 0.05:
        symbol = '*'
    fig.add_shape(type="line",
        xref="x", yref="paper",
        x0=days[0], y0=y_range[0], x1=days[0], y1=y_range[1],
        line=dict(
            color="black",
            width=2,
        )
    )
    fig.add_shape(type="line",
        xref="x", yref="paper",
        x0=days[0], y0=y_range[1], x1=days[1], y1=y_range[1],
        line=dict(
            color="black",
            width=2,
        )
    )
    fig.add_shape(type="line",
        xref="x", yref="paper",
        x0=days[1], y0=y_range[1], x1=days[1], y1=y_range[0],
        line=dict(
            color="black",
            width=2,
        )
    )
    ## add text at the correct x, y coordinates
    ## for bars, there is a direct mapping from the bar number to 0, 1, 2...
    bar_xcoord_map = {x: idx for idx, x in enumerate(['Thur','Fri','Sat','Sun'])}
    fig.add_annotation(dict(font=dict(color="black",size=14),
        x=(bar_xcoord_map[days[0]] + bar_xcoord_map[days[1]])/2,
        y=y_range[1]*1.03,
        showarrow=False,
        text=symbol,
        textangle=0,
        xref="x",
        yref="paper"
    ))

add_pvalue_annotation(['Thur','Sun'],[1.01,1.02])
add_pvalue_annotation(['Thur','Sat'],[1.05,1.06])

fig.show()

my_plot

I found this useful example here: Plotly box p-value significant annotation


Solution

  • When you are setting up the boxplots, using px.box from plotly.express will be useful since you can pass the argument color="sex" which will create two boxplots for each gender for every day. You'll also want to sort the tips DataFrame so that the days of the week are plotted in order.

    Then the add_pvalue_annotation function can be modified so that we are calculating the p-value for the t-test between men and women within each day (instead of the t-tests between tips for different days of the week). You'll also want to the change the starting and ending point of the annotations so that they are between the Men and Women categories within the same day instead of between different days.

    For the tips dataset, I ran t-tests between the men and women within each day of the week (e.g. men and women on Thur, men and women on Fri...), and none of the p-values are below 0.05.

    However, to demonstrate that add_pvalue_annotation function will place annotations down correctly, I set the p-value threshold to 0.15 so that the p-value between men and women on Friday (p-value = 0.13) will be annotated on the chart.

    from scipy import stats
    import plotly.express as px
    import plotly.graph_objects as go
    from pandas.api.types import CategoricalDtype
    
    tips = px.data.tips()
    cat_order = ['Thur', 'Fri', 'Sat', 'Sun']
    cat_weekdays = CategoricalDtype(cat_order, ordered=True)
    tips['day'] = tips['day'].astype(cat_weekdays)
    tips.sort_values(by='day', inplace=True)
    
    fig = px.box(tips, x="day", y="total_bill", color="sex")
    
    def add_pvalue_annotation(day, y_range, symbol='', pvalue_th=0.05):
        """
        arguments:
        days --- the day for which you want to calculate the p-value on a t-test between Men and Women (e.g. 'Thur')
        x_coordinate --- the x-coordinate 
        y_range --- a list of y_range in the form [y_min, y_max] in paper units
        """
        pvalue = stats.ttest_ind(
            tips[(tips['day']==day) & (tips['sex'] == 'Male')].total_bill,
            tips[(tips['day']==day) & (tips['sex'] == 'Female')].total_bill
        )[1]
    
        # print(f"pvalue between men and women on {day}: {pvalue}")
        # if pvalue >= pvalue_th:
        #      symbol = 'ns'
    
        if pvalue < pvalue_th:
            ## for bars, there is a direct mapping from the bar number to 0, 1, 2...
            bar_xcoord_map = {x: idx for idx, x in enumerate(cat_order)}
            x_coordinate = bar_xcoord_map[day]
            x_start, x_end = x_coordinate - 0.2, x_coordinate + 0.2
            symbol = '*'
            fig.add_shape(type="line",
                xref="x", yref="paper",
                x0=x_start, y0=y_range[0], x1=x_start, y1=y_range[1],
                line=dict(
                    color="black",
                    width=2,
                )
            )
            fig.add_shape(type="line",
                xref="x", yref="paper",
                x0=x_start, y0=y_range[1], x1=x_end, y1=y_range[1],
                line=dict(
                    color="black",
                    width=2,
                )
            )
            fig.add_shape(type="line",
                xref="x", yref="paper",
                x0=x_end, y0=y_range[1], x1=x_end, y1=y_range[0],
                line=dict(
                    color="black",
                    width=2,
                )
            )
            ## add text at the correct x, y coordinates
            fig.add_annotation(dict(font=dict(color="black",size=14),
                x=x_coordinate,
                y=y_range[1]*1.03,
                showarrow=False,
                text=symbol,
                textangle=0,
                xref="x",
                yref="paper"
            ))
    
    for day in cat_order:
        add_pvalue_annotation(day, [1.01,1.02], pvalue_th=0.15)
    
    fig.show()
    

    enter image description here