How can I change the position of the asterisk in the heatmap? I want the asterisk to be near the top right corner of the square where the number is located, rather than above the number. Positioning it on the inside of the corner, close to the corner rather than above or outside the relevant corner, can improve readability. Can it be done?
from string import ascii_letters
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
sns.set_theme(style="white")
rs = np.random.RandomState(33)
df = pd.DataFrame(data=rs.normal(size=(100, 26)),
columns=list(ascii_letters[26:]))
# Initialize an empty DataFrame for annotations
annotations = pd.DataFrame(index=df.columns, columns=df.columns)
# Recompute the correlation matrix and generate annotations including p-values
correlations = df.corr()
for col1 in df.columns:
for col2 in df.columns:
if col1 != col2: # Avoid calculating pearsonr for identical columns
corr, p = pearsonr(df[col1], df[col2])
annotation = f"{corr:.2f}"
if p < 0.05:
annotation += "*"
annotations.loc[col1, col2] = annotation
else:
annotations.loc[col1, col2] = f"{1:.2f}" # Diagonal elements are self-correlations
# Custom colormap
cmap = sns.diverging_palette(250, 10, as_cmap=True)
# Mask for the upper triangle
mask = np.triu(np.ones_like(correlations, dtype=bool))
plt.figure(figsize=(20, 15))
sns.heatmap(correlations, mask=mask, cmap=cmap, vmax=1, center=0, vmin=-1,
square=True, linewidths=.5, cbar_kws={"shrink": .5}, annot=annotations, fmt='')
plt.title('Coraletion Matrix')
plt.show()
You can directly write text
at the given locations:
from itertools import product
from string import ascii_uppercase
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.stats import pearsonr
rs = np.random.RandomState(33)
N = 10
columns = list(ascii_uppercase[:N])
df = pd.DataFrame(data=rs.normal(size=(100, N)), columns=columns)
# Initialize an empty DataFrame for annotations
correlations = pd.DataFrame(np.full((N, N), np.nan), index=columns, columns=columns)
p_values = pd.DataFrame(np.full((N, N), np.nan), index=columns, columns=columns)
# Symmetric matrix, and diagonal 1, so can skip
for i1, col1 in enumerate(columns):
for i2, col2 in enumerate(columns[i1 + 1 :], start=i1 + 1):
corr, p = pearsonr(df[col1], df[col2])
correlations.loc[col2, col1] = corr
p_values.loc[col2, col1] = p
# Figure
sns.set_theme(style="white")
cmap = sns.diverging_palette(250, 10, as_cmap=True)
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(
correlations,
cmap=cmap,
vmax=1,
center=0,
vmin=-1,
square=True,
linewidths=0.5,
cbar_kws={"shrink": 0.5},
annot=True,
fmt=".2f",
annot_kws=dict(fontsize=9),
ax=ax,
)
# Add an asterisk where the p-value is less than 0.05
for i, j in product(range(correlations.shape[0]), range(correlations.shape[1])):
if p_values.iloc[i, j] < 0.05:
ax.text(
j + 0.95,
i + 0.05,
"*",
ha="right",
va="top",
color="red",
fontsize=20,
fontweight="bold",
)
ax.set_title("Correlation Matrix")
plt.show()