I am trying to visualise a series of points on top of a Gantt chart. The result would look like this using sns.stripplot and plt.barh
My issue is that so far I am only able to produce each figure separately and place them one over the other manually, when I tried to combine my plots within the same figure you can see that the scatter plot is not aligned with corresponding step (bar):
Here is the code to create the 2 dataframes ("df" used for the scatter" and "df_gantt" for the gantt chart):
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
pd.set_option('display.max_colwidth', None)
# Set the number of rows and columns
num_rows = 10
num_cols = 3
# Create a dictionary of columns with random data
np.random.seed(12345)
data = {f"Step {i}": np.random.uniform(low=0, high=5, size=num_rows) for i in range(1, num_cols + 1)}
# Create the DataFrame
df = pd.DataFrame(data)
# Create the Gantt DataFrame
num_steps = num_cols
df_gantt = pd.DataFrame(columns=['Step', 'Start', 'End', 'Duration'])
for i in range(1, num_steps + 1):
start = 0 if i == 1 else df_gantt.loc[i - 2, 'End']
duration = np.random.randint(low=5, high=21)
end = start + duration
df_gantt = df_gantt.append({'Step': i, 'Start': start, 'End': end, 'Duration': duration}, ignore_index=True)
df_gantt.columns = ['Milestone','start_num','end_num','days_start_to_end']
df_gantt.reset_index(inplace=True,drop=True)
df_gantt['days_start_to_end_cum'] = df_gantt['days_start_to_end']
df_gantt['days_start_to_end_cum'] = df_gantt['days_start_to_end_cum'].cumsum()
df_gantt
# add the steps distance to the original dataframe
for col_idx, col in enumerate(df.columns):
# get the corresponding row from df_gantt
row = df_gantt.iloc[col_idx]
# add the value of days_start_to_end to the column
df[col] += row['days_start_to_end_cum']
df
And here is the code used to create the second image with my scatter plots not aligned with the gantt:
# Create figure and axis objects
fig, ax1 = plt.subplots(figsize=(16,8))
# Plot the first graph on the first axis
ax1.barh(df_gantt.Milestone, df_gantt.days_start_to_end, left=df_gantt.start_num, color= "#04AA6D", edgecolor = "Black",zorder=2)
for i in df_gantt.end_num.unique():
ax1.axvline(x=i,color='black', ls=':', lw=1.5,zorder=1)
ax1.invert_yaxis()
ax1.spines['top'].set_visible(False)
ax1.spines['right'].set_visible(False)
ax1.spines['bottom'].set_visible(False)
ax1.spines['left'].set_visible(False)
ax1.get_xaxis().set_visible(False)
ax1.get_yaxis().set_visible(False)
ax1.axvline(x=0,color='grey', ls=':', lw=0.3)
# Create a second axis that shares the same x-axis with the first axis
ax2 = ax1.twinx()
# Plot the second graph on the second axis
for i in df.columns:
sns.stripplot(x=df[i],
data=df,
color='grey',
edgecolor='black',
linewidth=1,
alpha=0.4,
dodge=True,
zorder=1,
ax=ax2)
ax2.scatter(x=df[i].mean(), y=0, zorder=2, marker="^", s=310, color="white", edgecolor='black', linewidth=2.5)
ax2.axvline(x=0, color='grey', ls=':', lw=0.3)
ax2.spines['top'].set_visible(False)
ax2.spines['right'].set_visible(False)
ax2.spines['bottom'].set_visible(False)
ax2.spines['left'].set_visible(False)
ax2.get_xaxis().set_visible(False)
ax2.get_yaxis().set_visible(False)
plt.xlim([-5, 70])
plt.show()
I don't think you need 2 axes to reproduce the desired figure.
Here is a working example:
# From here ...
fig, ax1 = plt.subplots(figsize=(16,8))
ax1.barh(df_gantt.Milestone, df_gantt.days_start_to_end, left=df_gantt.start_num, color= "#04AA6D", edgecolor = "Black",zorder=2)
for i in df_gantt.end_num.unique():
ax1.axvline(x=i, color='black', ls=':', lw=1.5,zorder=1)
ax1.axvline(x=0, color='grey', ls=':', lw=0.3)
# ... to here you had it
# Simply create the triangle scatter and stripplot on the same axis.
# orient="h" and native_scale=True are important param for the desired look
stripplot_kwargs = dict(orient="h", native_scale=True, color='grey', edgecolor='black', linewidth=1, alpha=0.4)
for n, colname in enumerate(df.columns, 1):
serie = df[colname]
sns.stripplot(x=serie, y=n, **stripplot_kwargs)
ax1.scatter(x=serie.mean(), y=n, zorder=2, marker="^", s=310, color="white", edgecolor='black', linewidth=2.5)
# Invert the axis and remove axis frame
ax1.invert_yaxis()
ax1.axis("off")