My data consists of several nested categories, for each category I am able to generate a stacked density plot such as the one illustrated here
Since I have several such density plots each having data in the x domain 0 till 100. For each stacked density plot I would like a single ridge plot. The end result would be a plot of Ridge plots where each row is a single stacked density plot. Is this possible?
Due to the nature of the ridge plots of having each plot obscuring the previous plot, I think the area under the curves of the stacked density plots may be misinterpreted by the observer as some section of the curve may be hidden by the next ridge plot. Hence I would like to drop the idea of having a stacked density plot in each ridge plot. But I would like to plot each variable as a ridge, but this time to include the mean and the standard deviation lines and have the area under the curve between both standard deviation lines shaded.
As requested (by JohanC), below is the code I would like to seek assistance on. Somehow I am unable to get rid of the "Density" label on the y-axis.
# seaborn ridge plots with penguins dataset
import logging;
import pandas as pd;
import pandas;
import matplotlib.pyplot as plt;
import numpy as np;
#!pip install seaborn;
import seaborn as sns;
LOG_FORMAT=("%(levelname) -5s time:%(asctime)s [%(funcName) ""-5s %(lineno) -5d]: %(message)s");
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT);
LOGGER = logging.getLogger(__name__);
logger_obj: logging.Logger=LOGGER;
my_df = sns.load_dataset("penguins");
sns.set_theme(style="white", rc={"axes.facecolor": (1, 1, 1, 1)});#background transparency
import errno;
def mkdir_p(path):
if(not(os.path.exists(path) and os.path.isdir(path))):
try:
os.makedirs(path,exist_ok=True);
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass;
else:
raise exc;
def generate_plot(
logger_obj: logging.Logger
,my_df: pandas.DataFrame
,sample_size: int
,axs2
):
my_df2 = my_df.copy(deep=True);
species_list: list=list(my_df2["species"].unique());
my_df3: pd.DataFrame();
sample_size2: int=sample_size;
for i2, species in enumerate(species_list):
species_record_count=len(my_df2[my_df2["species"]==species]);
flipper_length_mm_sum=my_df2[(my_df2["species"]==species)]["flipper_length_mm"].sum();
logger_obj.info("species is :'{0}', count is:{1}, flipper_length_mm_sum is:{2}".format(species, species_record_count, flipper_length_mm_sum));
if sample_size2>species_record_count:
sample_size2=species_record_count;
for i2, species in enumerate(species_list):
my_df4=my_df2[my_df2["species"]==species].sample(sample_size2);
species_record_count=len(my_df4);
flipper_length_mm_sum=my_df4["flipper_length_mm"].sum();
logger_obj.info("species is :'{0}', count is:{1}, flipper_length_mm_sum is:{2}".format(species, species_record_count, flipper_length_mm_sum));
if i2==0:
my_df3=my_df4[:];
else:
my_df3=pd.concat([my_df3, my_df4], ignore_index=True);
if 1==1:
sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0), 'axes.linewidth':2});
palette = sns.color_palette("Set2", 12);
g = sns.FacetGrid(data=my_df3, palette=palette, row="species", hue="species", aspect=9, height=1.2)
sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)});
g.map_dataframe(sns.kdeplot, x="flipper_length_mm", fill=True, alpha=1);
g.map_dataframe(sns.kdeplot, x="flipper_length_mm", color="white");
def label_f(x, color, label):
ax2=plt.gca();
ax2.text(0, .2, label, color="black", fontsize=13, ha="left", va="center", transform=ax2.transAxes);
g.map(label_f, "species");
g.fig.subplots_adjust(hspace=-.5);
g.set_titles("");
g.set(yticks=[], xlabel="flipper_length_mm");
g.set_titles(col_template="", row_template="");
g.despine(left=True);
image_png_fn: str="images/penguins.ridge_plot/sample_day_feature.flipper_length_mm.all_species.png";
logger_obj.info("image_png_fn is :'{0}'".format(image_png_fn));
mkdir_p(os.path.abspath(os.path.join(image_png_fn, os.pardir)));
plt.savefig(image_png_fn);
image_png_fn=None;
sample_size: int=30000;
generate_plot(
logger_obj
,my_df
,sample_size
,None
);
Here is how a ridge plot for the penguins dataset could look like, with the mean and a region for the standard deviation added per subplot. A lot of tweaking might be needed for you specific situation. (To remove the y-labels, you can use g.set(..., ylabel='')
).
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
penguins = sns.load_dataset('penguins')
sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0), 'axes.linewidth': 2});
palette = sns.color_palette("Set2", 12);
g = sns.FacetGrid(data=penguins, palette=palette, row="species", hue="species", aspect=9, height=1.2)
for (species, ax), color in zip(g.axes_dict.items(), palette):
# select the subset to be shown in the current subplot
x = penguins[penguins['species'] == species]['flipper_length_mm'].values
sns.kdeplot(x=x, color='white', fill=False, ax=ax)
mean = np.nanmean(x)
std = np.nanstd(x)
kdeline = ax.lines[0]
xs = kdeline.get_xdata()
ys = kdeline.get_ydata()
height = np.interp(mean, xs, ys)
ax.vlines(mean, 0, height, color='black', ls=':')
ax.fill_between(xs, 0, ys, facecolor=color, alpha=1)
# filter the region where x within one standard deviation of the mean
sd_filter = (xs >= mean - std) & (xs <= mean + std)
# show this region with a darker color
ax.fill_between(xs[sd_filter], 0, ys[sd_filter], facecolor='black', alpha=0.06)
ax.text(0, .05, species, color="black", fontsize=13, ha="left", va="bottom", transform=ax.transAxes);
g.fig.subplots_adjust(hspace=-.5, bottom=.15)
g.despine(left=True)
g.set(title='', ylabel='', yticks=[], xlabel="flipper length (mm)")
plt.show()