I visualized data with UMAP, and cannot add proper annotations. How can I use another dataframe of the same lenth to add hover text to a plotly express scatter?
What I understand, I can only specify a column from data_2d
there. Can I get all the rows as annotations from another_df
?
import plotly.express as px
def scatter(data_2d, labels, another_df):
c = dict(zip(labels.unique, px.colors.qualitative.G10)) # make the same colors for another data
fig = px.scatter(
data_2d, x=0, y=1,
color=labels,
color_discrete_map=c,
text=another_df, # here I'm stuck
# expected annotation
# column1: 57575874
# column2: 0.4545
# columnN: ....
# hover_data awaits for labels from data_2d and it doesn't work
# text is constant, I see just a mess of text
)
fig.update_traces(marker=dict(size=5, opacity=0.7))
fig.show()
import plotly.express as px
import pandas as pd
import numpy as np
import umap.umap_ as umap
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
def scatter(data_2d, labels, another_df):
c = dict(zip(labels.unique(), px.colors.qualitative.G10)) # make the same colors for another data
fig = px.scatter(
another_df,
x=data_2d[:,0],y=data_2d[:,1],
color=labels,
color_discrete_map=c,
hover_name="island",
hover_data=another_df.columns
)
fig.update_traces(marker=dict(size=5, opacity=0.7))
fig.show()
penguins = pd.read_csv("https://github.com/allisonhorst/palmerpenguins/raw/5b5891f01b52ae26ad8cb9755ec93672f49328a8/data/penguins_size.csv")
data = penguins.loc[:,["culmen_length_mm","culmen_depth_mm","flipper_length_mm","body_mass_g",]].dropna()
scaled_penguin_data = StandardScaler().fit_transform(data.values)
reducer = umap.UMAP()
embedding = reducer.fit_transform(scaled_penguin_data)
scatter(embedding, penguins.loc[data.index, "sex"].fillna("UNKNOWN"), penguins.loc[data.index])