Search code examples
pythonseaborn

plot density ridge plot with conditional fill color in python


I am trying to produce density ridge plots in python where the fill is conditional on the x-values as has been done in this post: plot continuous geom_density_ridges with conditional fill color

enter image description here

My current python code is only producing a density plot per fill color rather than a continuous density plot with alternating fill color, which I want.

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

data = pd.DataFrame({
    'A': np.random.normal(0, 0.25, 4000),
    'B': np.random.normal(0.2, 0.25, 4000),
    'C': np.random.normal(0.4, 0.25, 4000)
})

data_melted = pd.melt(data, var_name='recipe', value_name='values')
data_melted.head()

enter image description here

# Create a ridge plot for each recipe
for recipe in data_melted['recipe'].unique():
    # Filter data for the current recipe
    recipe_data = data_melted[data_melted['recipe']==recipe]
    # Create a density plot with filled areas for values > 0 and <= 0
    sns.kdeplot(x=recipe_data['values'], hue=recipe_data['values']>0,
                fill=True, palette=['red', 'blue'], alpha=0.5, linewidth=0.5)
    # Set title and labels
    plt.title(f"Ridge Plot for Recipe {recipe}")
    plt.xlabel('Values')
    plt.ylabel('Density')
    # Display plot
    plt.show()

enter image description here


Solution

  • A kde curve is a sum of small gaussians. As such, it is never straight, even when the data stop abrupt.

    To get a straight line, you could generate the coordinates of the kde curve via sns.kdeplot(..., fill=False). And then extract the coordinates to use them in separate calls to ax.fill_between().

    Here follows some test code. Note that a nice ridgeplot needs a lot of fine-tuning, which heavily depends on your concrete situation.

    import matplotlib.pyplot as plt
    import seaborn as sns
    import pandas as pd
    import numpy as np
    
    data = pd.DataFrame({'A': np.random.normal(0, 0.25, 4000),
                         'B': np.random.normal(0.2, 0.25, 4000),
                         'C': np.random.normal(0.4, 0.25, 4000)})
    data_melted = pd.melt(data, var_name='recipe', value_name='values')
    
    recipes = data_melted['recipe'].unique()
    fig, axs = plt.subplots(nrows=len(recipes), sharex=True, sharey=True)
    # Create a ridge plot for each recipe
    for ax, recipe in zip(axs, recipes):
        # Filter data for the current recipe
        recipe_data = data_melted[data_melted['recipe'] == recipe]
        # Create a density plot, using a curve, not yet filled
        sns.kdeplot(x=recipe_data['values'], fill=False, ax=ax)
        # Extract the coordinates of the curve
        kde_x, kde_y = ax.get_lines()[0].get_data()
        # remove the curve, as we only need the coordinates
        ax.get_lines()[0].remove()
        # find the coordinate closest to zero
        zero = np.argmin(np.abs(kde_x))
        # color the part below zero
        ax.fill_between(kde_x[:zero + 1], kde_y[:zero + 1], color='crimson', alpha=0.5, linewidth=0.5)
        # color the part above zero
        ax.fill_between(kde_x[zero:], kde_y[zero:], color='skyblue', alpha=0.5, linewidth=0.5)
        # show the name of the recipe
        ax.text(0.02, 0.02, f"Recipe {recipe}", horizontalalignment='left', verticalalignment='bottom',
                transform=ax.transAxes)
        # let the curve "sit" on the x-axis
        ax.set_ylim(ymin=0)
    sns.despine()
    plt.show()
    

    sns.kdeplot with two colors