Search code examples
pythonmatplotlibseabornvisualization

how to be relocated asterisk in heatmap with asterisk


How can I change the position of the asterisk in the heatmap? I want the asterisk to be near the top right corner of the square where the number is located, rather than above the number. Positioning it on the inside of the corner, close to the corner rather than above or outside the relevant corner, can improve readability. Can it be done?

from string import ascii_letters
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import pearsonr

sns.set_theme(style="white")
rs = np.random.RandomState(33)
df = pd.DataFrame(data=rs.normal(size=(100, 26)),
                 columns=list(ascii_letters[26:]))

# Initialize an empty DataFrame for annotations
annotations = pd.DataFrame(index=df.columns, columns=df.columns)

# Recompute the correlation matrix and generate annotations including p-values
correlations = df.corr()

for col1 in df.columns:
    for col2 in df.columns:
        if col1 != col2:  # Avoid calculating pearsonr for identical columns
            corr, p = pearsonr(df[col1], df[col2])
            annotation = f"{corr:.2f}"
            if p < 0.05:
                annotation += "*"
            annotations.loc[col1, col2] = annotation
        else:
            annotations.loc[col1, col2] = f"{1:.2f}"  # Diagonal elements are self-correlations

# Custom colormap
cmap = sns.diverging_palette(250, 10, as_cmap=True)

# Mask for the upper triangle
mask = np.triu(np.ones_like(correlations, dtype=bool))

plt.figure(figsize=(20, 15))
sns.heatmap(correlations, mask=mask, cmap=cmap, vmax=1, center=0, vmin=-1,
            square=True, linewidths=.5, cbar_kws={"shrink": .5}, annot=annotations, fmt='')

plt.title('Coraletion Matrix')
plt.show()

Solution

  • You can directly write text at the given locations:

    from itertools import product
    from string import ascii_uppercase
    
    import matplotlib.pyplot as plt
    import numpy as np
    import pandas as pd
    import seaborn as sns
    from scipy.stats import pearsonr
    
    rs = np.random.RandomState(33)
    
    N = 10
    columns = list(ascii_uppercase[:N])
    
    df = pd.DataFrame(data=rs.normal(size=(100, N)), columns=columns)
    
    # Initialize an empty DataFrame for annotations
    correlations = pd.DataFrame(np.full((N, N), np.nan), index=columns, columns=columns)
    p_values = pd.DataFrame(np.full((N, N), np.nan), index=columns, columns=columns)
    
    # Symmetric matrix, and diagonal 1, so can skip
    for i1, col1 in enumerate(columns):
        for i2, col2 in enumerate(columns[i1 + 1 :], start=i1 + 1):
            corr, p = pearsonr(df[col1], df[col2])
            correlations.loc[col2, col1] = corr
            p_values.loc[col2, col1] = p
    
    # Figure
    sns.set_theme(style="white")
    cmap = sns.diverging_palette(250, 10, as_cmap=True)
    fig, ax = plt.subplots(figsize=(10, 8))
    
    sns.heatmap(
        correlations,
        cmap=cmap,
        vmax=1,
        center=0,
        vmin=-1,
        square=True,
        linewidths=0.5,
        cbar_kws={"shrink": 0.5},
        annot=True,
        fmt=".2f",
        annot_kws=dict(fontsize=9),
        ax=ax,
    )
    
    # Add an asterisk where the p-value is less than 0.05
    for i, j in product(range(correlations.shape[0]), range(correlations.shape[1])):
        if p_values.iloc[i, j] < 0.05:
            ax.text(
                j + 0.95,
                i + 0.05,
                "*",
                ha="right",
                va="top",
                color="red",
                fontsize=20,
                fontweight="bold",
            )
    
    ax.set_title("Correlation Matrix")
    plt.show()
    

    correlation matrix