Search code examples
pythonscikit-learnvisualizationpca

How to visualize high-dimension vectors as points in 2D plane?


For example, there are three vectors as below.

[ 0.0377,  0.1808,  0.0807, -0.0703,  0.2427, -0.1957, -0.0712, -0.2137,
     -0.0754, -0.1200,  0.1919,  0.0373,  0.0536,  0.0887, -0.1916, -0.1268,
     -0.1910, -0.1411, -0.1282,  0.0274, -0.0781,  0.0138, -0.0654,  0.0491,
      0.0398,  0.1696,  0.0365,  0.2266,  0.1241,  0.0176,  0.0881,  0.2993,
     -0.1425, -0.2535,  0.1801, -0.1188,  0.1251,  0.1840,  0.1112,  0.3172,
      0.0844, -0.1142,  0.0662,  0.0910,  0.0416,  0.2104,  0.0781, -0.0348,
     -0.1488,  0.0129],
 [-0.1302,  0.1581, -0.0897,  0.1024, -0.1133,  0.1076,  0.1595, -0.1047,
      0.0760,  0.1092,  0.0062, -0.1567, -0.1448, -0.0548, -0.1275, -0.0689,
     -0.1293,  0.1024,  0.1615,  0.0869,  0.2906, -0.2056,  0.0442, -0.0595,
     -0.1448,  0.0167, -0.1259, -0.0989,  0.0651, -0.0424,  0.0795, -0.1546,
      0.1330, -0.2284,  0.1672,  0.1847,  0.0841,  0.1771, -0.0101, -0.0681,
      0.1497,  0.1226,  0.1146, -0.2090,  0.3275,  0.0981, -0.3295,  0.0590,
      0.1130, -0.0650],
 [-0.1745, -0.1940, -0.1529, -0.0964,  0.2657, -0.0979,  0.1510, -0.1248,
     -0.1541,  0.1782, -0.1769, -0.2335,  0.2011,  0.1906, -0.1918,  0.1896,
     -0.2183, -0.1543,  0.1816,  0.1684, -0.1318,  0.2285,  0.1784,  0.2260,
     -0.2331,  0.0523,  0.1882,  0.1764, -0.1686,  0.2292]

How to plot them as three points in the same 2D plane like this picture below? Thanks!

In this picture, the three points represents three vectors for the three words


Solution

  • I use PCA from sklearn, maybe this code help you:

    import matplotlib.pyplot as plt
    import numpy as np
    from sklearn.decomposition import PCA
    
    usa = [ 0.0377,  0.1808,  0.0807, -0.0703,  0.2427, -0.1957, -0.0712, -0.2137,
         -0.0754, -0.1200,  0.1919,  0.0373,  0.0536,  0.0887, -0.1916, -0.1268,
         -0.1910, -0.1411, -0.1282,  0.0274, -0.0781,  0.0138, -0.0654,  0.0491,
          0.0398,  0.1696,  0.0365,  0.2266,  0.1241,  0.0176,  0.0881,  0.2993,
         -0.1425, -0.2535,  0.1801, -0.1188,  0.1251,  0.1840,  0.1112,  0.3172,
          0.0844, -0.1142,  0.0662,  0.0910,  0.0416,  0.2104,  0.0781, -0.0348,
         -0.1488,  0.0129]
    obama =  [-0.1302,  0.1581, -0.0897,  0.1024, -0.1133,  0.1076,  0.1595, -0.1047,
          0.0760,  0.1092,  0.0062, -0.1567, -0.1448, -0.0548, -0.1275, -0.0689,
         -0.1293,  0.1024,  0.1615,  0.0869,  0.2906, -0.2056,  0.0442, -0.0595,
         -0.1448,  0.0167, -0.1259, -0.0989,  0.0651, -0.0424,  0.0795, -0.1546,
          0.1330, -0.2284,  0.1672,  0.1847,  0.0841,  0.1771, -0.0101, -0.0681,
          0.1497,  0.1226,  0.1146, -0.2090,  0.3275,  0.0981, -0.3295,  0.0590,
          0.1130, -0.0650]
    nationality =  [-0.1745, -0.1940, -0.1529, -0.0964,  0.2657, -0.0979,  0.1510, -0.1248,
         -0.1541,  0.1782, -0.1769, -0.2335,  0.2011,  0.1906, -0.1918,  0.1896,
         -0.2183, -0.1543,  0.1816,  0.1684, -0.1318,  0.2285,  0.1784,  0.2260,
         -0.2331,  0.0523,  0.1882,  0.1764, -0.1686,  0.2292]
    
    
    pca = PCA(n_components=1)
    
    X = np.array(usa).reshape(2,len(usa)//2)
    X = pca.fit_transform(X)
    
    Y = np.array(obama).reshape(2,len(obama)//2)
    Y = pca.fit_transform(Y)
    
    Z = np.array(nationality).reshape(2,len(nationality)//2)
    Z = pca.fit_transform(Z)
    
    
    
    x_coordinates = [X[0][0], Y[0][0], Z[0][0]]
    y_coordinates = [X[1][0], Y[1][0], Z[1][0]]
    colors = ['r','g','b']
    annotations=["U.S.A","Obama","Nationality"]
    
    plt.figure(figsize=(8,6))
    plt.scatter(x_coordinates, y_coordinates, marker=",", color=colors,s=300)
    
    for i, label in enumerate(annotations):
        plt.annotate(label, (x_coordinates[i], y_coordinates[i]))
        
    
    plt.show()
    

    output:

    enter image description here