Search code examples
pythonpandascluster-analysisplotlyk-means

Customize labels with Plotly


I am trying to customize the data labels which appear when hovering: enter image description here Here is the code giving me the output above:

import pandas as pd
import plotly.plotly as py
import plotly.graph_objs as go

# Create random data
labels = ['A', 'B', 'C']
N = 20
df = pd.DataFrame(index = range(N))
standardized_cols = []

for col in labels:
    df[col] = np.random.randn(N)
    standardized_colname  =  col + "_standardized"
    standardized_cols.append(standardized_colname)
    df[standardized_colname] = (df[col]-df[col].mean())/df[col].std()

# Cluster
c = KMeans(n_clusters=3, random_state=1).fit(df[standardized_cols]).labels_

# Plot
trace = go.Scatter3d(
    x=df.A_standardized,
    y=df.B_standardized,
    z=df.C_standardized,

    mode='markers',
    marker=dict(
        size=5,
        color=c,              
        colorscale='Viridis',   
    ),
    name= 'test',
    text= c
)

data = [trace]

fig = go.Figure(data=data, layout=layout)
iplot(fig)

My data: enter image description here]

The chart shows me clustering of the standardized columns. But when hovering over the data I'd like to see the not standardized data in the lable, i.e.something like

A: 0,999
B: 0,565
C: 0,765
Cluster: 2

I experimented but could not figure out how to achieve this. Is this possible?


Solution

  • You can do some list comprehension and add whatever columns you want to text see sample below (note, I am plotting offline):

    # data
    np.random.seed(1)
    labels = ['A', 'B', 'C']
    N = 20
    df = pd.DataFrame(index = range(N))
    standardized_cols = []
    
    for col in labels:
        df[col] = np.random.randn(N)
        standardized_colname  =  col + "_standardized"
        standardized_cols.append(standardized_colname)
        df[standardized_colname] = (df[col]-df[col].mean())/df[col].std()
    
    c = KMeans(n_clusters=3, random_state=1).fit(df[standardized_cols]).labels_
    

    plot:

    import plotly as py
    import plotly.graph_objs as go
    
    
    trace = go.Scatter3d(
        x=df.A_standardized,
        y=df.B_standardized,
        z=df.C_standardized,
    
        mode='markers',
        marker=dict(
            size=5,
            color=c,              
            colorscale='Viridis',   
        ),
        name= 'test',
    
        # list comprehension to add text on hover
        text= [f"A: {a}<br>B: {b}<br>C: {c}" for a,b,c in list(zip(df['A'], df['B'], df['C']))],
        # if you do not want to display x,y,z
        # hoverinfo='text'
    
    
    )
    
    
    layout = dict(title = 'TEST',)
    
    data = [trace]
    fig = dict(data=data, layout=layout)
    
    py.offline.plot(fig, filename = 'stackTest.html')
    

    enter image description here

    you can modify the list comprehension to display whatever you want

    if you do not want to display x,y,z then add hoverinfo='text'