Search code examples
pythonpython-3.xmatplotlibmachine-learningdimensionality-reduction

Using TSNE to dimensionality reduction. Why 3 D graph is not working?


I have used the Digits dataset from Sklearn and I have tried to reduce the dimension from 64 to 3 using TSNE( t-Distributed Stochastic Neighbor Embedding):

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#%matplotib inline
from sklearn.manifold import TSNE
from sklearn.datasets import load_digits
from mpl_toolkits.mplot3d import Axes3D


digits = load_digits()
digits_df = pd.DataFrame(digits.data,)
digits_df["target"] = pd.Series(digits.target)

tsne = TSNE(n_components=3)
digits_tsne = tsne.fit_transform(digits_df.iloc[:,:64])
digits_df_tsne = pd.DataFrame(digits_tsne,
                            columns =["Component1","Component2","Component3"])

finalDf = pd.concat([digits_df_tsne, digits_df["target"]], axis = 1)

#Visualizing 3D
figure = plt.figure(figsize=(9,9))
axes = figure.add_subplot(111,projection = "3d")
dots = axes.scatter(xs = finalDf[:,0],ys = finalDf[:,1],zs = finalDf[:,2],
                   c = digits.target, cmap = plt.cm.get_cmap("nipy_spectral_r",10))

The finalDf:

enter image description here

Te error:

TypeError: '(slice(None, None, None), 0)' is an invalid key

What is wrong? Could someone help me?


Solution

  • You're trying numpy slicing on pandas dataframe which is not valid, so first convert the dataframes to numpy arrays.

    Here's the updated code: -

    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    import seaborn as sns
    #%matplotib inline
    from sklearn.manifold import TSNE
    from sklearn.datasets import load_digits
    from mpl_toolkits.mplot3d import Axes3D
    
    
    digits = load_digits()
    digits_df = pd.DataFrame(digits.data,)
    digits_df["target"] = pd.Series(digits.target)
    
    tsne = TSNE(n_components=3)
    digits_tsne = tsne.fit_transform(digits_df.iloc[:,:64])
    digits_df_tsne = pd.DataFrame(digits_tsne,
                                columns =["Component1","Component2","Component3"])
    
    finalDf = pd.concat([digits_df_tsne, digits_df["target"]], axis = 1)
    
    #Visualizing 3D
    figure = plt.figure(figsize=(9,9))
    axes = figure.add_subplot(111,projection = "3d")
    dots = axes.scatter(xs = finalDf.to_numpy()[:,0],ys = finalDf.to_numpy()[:,1],zs = finalDf.to_numpy()[:,2],
                       c = digits.target, cmap = plt.cm.get_cmap("nipy_spectral_r",10))
    

    enter image description here