Search code examples
pythonlistmatplotlibcluster-analysisdynamic-list

How to make a dynamic clustering


I have a nested list that represents dynamic image data, with each inner list containing a number and the x-center coordinate of a bounding box.I need to clustering and get approximate bounding box x coordinate the inetger value with sorting. The expected output is [[100,1] ,[120,1],[151,3],[180,0]]

I've visualized this data using Matplotlib to gain insights into its distribution across the space. Now, I'm looking to perform clustering on this data, approximate the coordinates, and sort it based on the x-center values. To help you better understand, I'll provide the code and a plot as well.enter image description here

import matplotlib.pyplot as plt
data = [
    [[100, 1], [120, 1], [150, 3]],
    [[101, 1], [119, 1], [151, 3]],
    [[102, 1], [123, 1], [150, 3], [180, 0]],
    [[103, 1], [154, 3], [180, 0]],
    [[103, 1], [152, 3], [181, 0]],
    [[101, 1], [120, 1], [180, 0]],
    [[101, 1], [120, 1], [150, 3]],
    [[101, 1], [119, 1], [150, 3]],
    [[102, 1], [123, 1], [150, 3], [181, 0]],
    [[103, 1], [153, 3], [181, 0]],
    [[103, 1], [152, 3], [181, 0]],
    [[101, 1], [120, 1], [180, 0]],
    [[100, 1], [122, 1], [150, 3], [181, 0]],
    # Add more data here
]

x_values = []
y_values = []

for sublist in data:
    for subsublist in sublist:
        x_values.append(subsublist[0])
        y_values.append(subsublist[1])

print("X Values:", x_values)
print("Y Values:", y_values)


plt.scatter(x_values, y_values)
plt.xlabel('X Values')
plt.ylabel('Y Values')
plt.title('Scatter Plot of X and Y Values')
plt.grid(True)
plt.show()

Solution

  • You can use the DBSCAN clustering algorithm to achieve this. Here is the code and result:

    import matplotlib.pyplot as plt
    import numpy as np
    from sklearn.cluster import DBSCAN
    
    data = [
        [[100, 1], [120, 1], [150, 3]],
        [[101, 1], [119, 1], [151, 3]],
        [[102, 1], [123, 1], [150, 3], [180, 0]],
        [[103, 1], [154, 3], [180, 0]],
        [[103, 1], [152, 3], [181, 0]],
        [[101, 1], [120, 1], [180, 0]],
        [[101, 1], [120, 1], [150, 3]],
        [[101, 1], [119, 1], [150, 3]],
        [[102, 1], [123, 1], [150, 3], [181, 0]],
        [[103, 1], [153, 3], [181, 0]],
        [[103, 1], [152, 3], [181, 0]],
        [[101, 1], [120, 1], [180, 0]],
        [[100, 1], [122, 1], [150, 3], [181, 0]],
        # Add more data here
    ]
    
    x_values = []
    y_values = []
    
    for sublist in data:
        for subsublist in sublist:
            x_values.append(subsublist[0])
            y_values.append(subsublist[1])
    
    X = np.array(list(zip(x_values, y_values)))
    clustering = DBSCAN(eps=10, min_samples=2).fit(X)
    centroids = []
    for label in np.unique(clustering.labels_):
        cluster_values = X[clustering.labels_ == label]
        # Could also use median here
        centroid = np.mean(cluster_values, axis=0)
        print(centroid)
        centroids.append(centroid)
    
    centroids = np.array(centroids)
    
    plt.scatter(X[:, 0], X[:, 1])
    plt.scatter(centroids[:, 0], centroids[:, 1], s=1000, facecolors="none", edgecolors="r")
    plt.show()
    

    enter image description here