Search code examples
python-3.xnumpyjupyter-notebookk-means

How to fix Invalid index to scalar variable?


I tried to write K-means algorithm and got this error in line: points = np.array ([x[j] for j in range (len (x)) if cluster [j] == i]) anyone can help?

from copy import deepcopy

def euclidean_distance (a, b, ax = 1):
    return np.linalg.norm (a - b , axis = ax)

c_prev = np.zeros (c.shape)
clusters = np.zeros (len(x))

distance_differences = euclidean_distance (c, c_prev)

while distance_differences.any () != 0:
    for i in range (len(x)):
        distances = euclidean_distance (x[i], c)
        cluster = np.argmin (distances)
        clusters [i] = cluster
    c_prev = deepcopy (c)
    for i in range (k):
        points = [x[j] for j in range (len(x)) if clusters [j] == i]
        if len(points) != 0:
            c[i] = np.mean (points, axis = 0)

    distance_differences = euclidean_distance (c, c_prev)

colors = ['b', 'r', 'y', 'g', 'c', 'm']
for i in range (k):
    points = np.array ([x[j] for j in range (len (x)) if cluster [j] == i])
    if len(points) > 0:
        plt.scatter (points [:, 0], points [:, 1], s= 10, c = colors [i])
    else:
        print ('Please regenerate your centeroids again')

plt.scatter (points [:, 0], points [:, 1], s= 10, c = colors [i])
plt.scatter(c[:, 0], c[:, 1], marker = '*', s =100, c ='k')
plt.show();

Solution

  • Yes you have a typo in the line,

    points = np.array ([x[j] for j in range (len (x)) if cluster [j] == i])
    

    It should read,

    points = np.array ([x[j] for j in range (len (x)) if clusters[j] == i])
    

    As cluster is just a scalar (ie only has a single value) & clusters is the array