Search code examples
pythonmatplotlibscikit-learnk-meansgraph-visualization

Visualisation of clusters using outer coordinates of the individual cluster


I want to visualise my clusters.

By using this code:

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

X = np.array([[28, 7], [36, 5], [32, 2], [56, 8], [47, 5], [50,100], [100,100], [26,59], [19,71],
              [75, 9], [34, 4], [56, 9], [28, 1], [33, 6]])
col = ['blue', 'green', 'c', 'm', 'y', 'k', "violet", "indigo"]
ncluster = 2
kmeans = KMeans(n_clusters=ncluster, max_iter=500).fit(X)
y = kmeans.labels_
centroids = kmeans.cluster_centers_
clusters_centroids = dict()
clusters_radii = dict()
for cluster in range(ncluster):
    clusters_centroids[cluster] = list(
        zip(centroids[:, 0], centroids[:, 1]))[cluster]
    clusters_radii[cluster] = max([np.linalg.norm(np.subtract(
        i, clusters_centroids[cluster])) for i in zip(X[y == cluster, 0], X[y == cluster, 1])])

fig, ax = plt.subplots(1, figsize=(7, 5))

def drawclusters():
    for i in range(ncluster):
        plt.scatter(X[y == i, 0], X[y == i, 1], s=100,
                    c=col[i], label=f'Cluster {i + 1}')
        art = mpatches.Circle(
            clusters_centroids[i], clusters_radii[i], edgecolor=col[i], fill=False)
        ax.add_patch(art)
    plt.scatter(centroids[:, 0], centroids[:, 1], s=200,
                c='red', label='Centroids', marker='x')


drawclusters()
plt.legend()
plt.tight_layout()
plt.show()

I am getting circles:

circular visualisation

But I want to visualise using points something similar to this ignore the data part I just need the visualisation part ( I need shapes ):

non ciruclar

I need code in python. There is a function fviz_cluster in R.


Solution

  • You can create the convex hull of each of the clusters using scipy.spatial.ConvexHull(). Note that X[y == i] needs to be converted to a new array, because ConvexHull() returns indices into the short array. The returned points form a polygon. The first point needs to be copied at the end to plot to include the line segment that closes the polygon.

    import matplotlib.pyplot as plt
    import numpy as np
    from sklearn.cluster import KMeans
    from scipy.spatial import ConvexHull
    
    def drawclusters(ax):
        for i in range(ncluster):
            points = X[y == i]
            ax.scatter(points[:, 0], points[:, 1], s=100, c=col[i], label=f'Cluster {i + 1}')
            hull = ConvexHull(points)
            vert = np.append(hull.vertices, hull.vertices[0])  # close the polygon by appending the first point at the end
            ax.plot(points[vert, 0], points[vert, 1], '--', c=col[i])
            ax.fill(points[vert, 0], points[vert, 1], c=col[i], alpha=0.2)
        ax.scatter(centroids[:, 0], centroids[:, 1], s=200, c='red', label='Centroids', marker='x')
    
    X = np.array([[28, 7], [36, 5], [32, 2], [56, 8], [47, 5], [50, 100], [100, 100], [26, 59], [19, 71],
                  [75, 9], [34, 4], [56, 9], [28, 1], [33, 6]])
    col = ['blue', 'green']
    ncluster = 2
    kmeans = KMeans(n_clusters=ncluster, max_iter=500).fit(X)
    y = kmeans.labels_
    centroids = kmeans.cluster_centers_
    fig, ax = plt.subplots(1, figsize=(7, 5))
    drawclusters(ax)
    ax.legend()
    plt.tight_layout()
    plt.show()
    

    resulting plot