Search code examples
pythonmachine-learningdata-sciencecluster-analysisnetworkx

To make a graph using Networkx after spectral clustering on moons dataset


I have generated moons dataset with 20 points and done spectral clustering on it. I want to form a graph using nearest neighbours = 3 with the help of Networkx. Where data points are the nodes and the affinity matrix generated after clustering is the weight over the edges between different nodes. I also need help in changing the colour and shape of the nodes of the two clusters so as to differentiate nodes of one cluster from another. Code is given below. An output image is given below. I just want to make a graph between the nodes of my output image using nearest neighbour=3.

import numpy as np
import os
from sklearn import metrics
from sklearn.cluster import SpectralClustering
from sklearn.neighbors import DistanceMetric
from sklearn.cluster import KMeans
import pandas as pd
import pylab as pl
import sklearn.metrics as sm
from sklearn.metrics import confusion_matrix,classification_report
from sklearn.preprocessing import MinMaxScaler
from sklearn.datasets import make_moons
import matplotlib.pyplot as plt
import networkx as nx
X, y = make_moons(n_samples=20)
print(X)
print(y)
plt.scatter(X[:,0],X[:,1], marker='o', facecolors='none', edgecolor='r')
clustering=SpectralClustering(n_clusters=2,
       assign_labels='kmeans',affinity='rbf',gamma=50, degree=3,
         random_state=0)
y_predict=clustering.fit_predict(X)
y_predict
clustering.labels_
clustering.affinity_matrix_
for i in range(0, y_predict.shape[0]):
    if y[i]==0 and y_predict[i]==0 :
        c1 = pl.scatter(X[i,0],X[i,1],c='b',
    marker='+')
    elif y[i]==1 and y_predict[i]==0:
        c2 = pl.scatter(X[i,0],X[i,1], facecolors='none', edgecolor='b',
    marker='o')
    elif y[i]==0 and y_predict[i]==1:
        c3=pl.scatter(X[i,0],X[i,1],c='r',
    marker='+')
    elif y[i]==1 and y_predict[i]==1:
        c4=pl.scatter(X[i,0],X[i,1], facecolors='none', edgecolor='r',
    marker='o')
pl.show()

Image of the clustering of moons dataset is given below


Solution

  • Building off of the answer to your previous question, this is what you asked for I believe.

    updated_moon_fig

    Since the values in the affinity matrix are all between 0 and 1 but of very different relative magnitudes, I used -10 / log(weight) as the edge width.

    import numpy as np
    import os
    from sklearn import metrics
    from sklearn.cluster import SpectralClustering
    from sklearn.neighbors import DistanceMetric
    from sklearn.cluster import KMeans
    import pandas as pd
    import pylab as pl
    import sklearn.metrics as sm
    from sklearn.metrics import confusion_matrix,classification_report
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.datasets import make_moons
    import matplotlib.pyplot as plt
    import networkx as nx
    import math
    X, y = make_moons(n_samples=20)
    print(X)
    print(y)
    #plt.scatter(X[:,0],X[:,1], marker='o', facecolors='none', edgecolor='r')
    pl.figure(figsize=(15, 12))
    clustering=SpectralClustering(n_clusters=2,
           assign_labels='kmeans',affinity='rbf',gamma=50, degree=3,
             random_state=0)
    y_predict=clustering.fit_predict(X)
    y_predict
    clustering.labels_
    clustering.affinity_matrix_
    for i in range(0, y_predict.shape[0]):
        if y[i]==0 and y_predict[i]==0 :
            c1 = pl.scatter(X[i,0],X[i,1],c='b',
        marker='+')
        elif y[i]==1 and y_predict[i]==0:
            c2 = pl.scatter(X[i,0],X[i,1], facecolors='none', edgecolor='b',
        marker='o')
        elif y[i]==0 and y_predict[i]==1:
            c3=pl.scatter(X[i,0],X[i,1],c='r',
        marker='+')
        elif y[i]==1 and y_predict[i]==1:
            c4=pl.scatter(X[i,0],X[i,1], facecolors='none', edgecolor='r',
        marker='o')
            
    for i in range(0, len(X)):
      affinity_list = clustering.affinity_matrix_[i]
      affinity_list[i] = 0 # in case we don't want to consider the node as it's own neighbour
      nearest_neighbors_indices = np.argpartition(clustering.affinity_matrix_[i], -k)[-k:]
      for j in nearest_neighbors_indices:
        G.add_edge(tuple(X[i]), tuple(X[j]), weight = clustering.affinity_matrix_[i][j])
    
    weights = [-10/math.log(edge[-1]['weight']) for edge in G.edges.data()]
    # Draw Graph
    pos = {node_name: node_name for node_name in G.nodes}
    nx.draw_networkx_edges(G, pos, width=weights)
    pl.show()