python pandas plotly scatter-plot plotly-express

Add another dataframe as annotation in plotly express

I visualized data with UMAP, and cannot add proper annotations. How can I use another dataframe of the same lenth to add hover text to a plotly express scatter? What I understand, I can only specify a column from data_2d there. Can I get all the rows as annotations from another_df?

import plotly.express as px
def scatter(data_2d, labels, another_df):
    c = dict(zip(labels.unique, px.colors.qualitative.G10)) # make the same colors for another data
    fig = px.scatter(
        data_2d, x=0, y=1,
        color=labels,
        color_discrete_map=c,
        text=another_df, # here I'm stuck
        # expected annotation 
      #  column1: 57575874
      #  column2: 0.4545
      #  columnN: ....

        # hover_data  awaits for labels from data_2d and it doesn't work
        # text is constant, I see just a mess of text
    )

    fig.update_traces(marker=dict(size=5, opacity=0.7))

    fig.show()

Solution

your sample code
- does not define data structures, from comments I have implied what is in code
- x and y as values is syntacically incorrect, have fixed to extract to 2D numpy array from UMAP
solution
- you have defined that another_df is same length as data_2d. An option is to use dataframe of all columns before it is put through transforms
- labels parameter is really redundant, it is part of another_df
- you want to define hover inputs, have defined hover_name and hover_data. Used all columns from another_df

import plotly.express as px
import pandas as pd
import numpy as np
import umap.umap_ as umap
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

def scatter(data_2d, labels, another_df):
    c = dict(zip(labels.unique(), px.colors.qualitative.G10)) # make the same colors for another data
    fig = px.scatter(
        another_df, 
        x=data_2d[:,0],y=data_2d[:,1],
        color=labels,
        color_discrete_map=c,
        hover_name="island",
        hover_data=another_df.columns
    )
    fig.update_traces(marker=dict(size=5, opacity=0.7))
    fig.show()

    
penguins = pd.read_csv("https://github.com/allisonhorst/palmerpenguins/raw/5b5891f01b52ae26ad8cb9755ec93672f49328a8/data/penguins_size.csv")
data = penguins.loc[:,["culmen_length_mm","culmen_depth_mm","flipper_length_mm","body_mass_g",]].dropna()
scaled_penguin_data = StandardScaler().fit_transform(data.values)

reducer = umap.UMAP()
embedding = reducer.fit_transform(scaled_penguin_data)
scatter(embedding, penguins.loc[data.index, "sex"].fillna("UNKNOWN"), penguins.loc[data.index])