Search code examples
pythoncluster-analysis

Python indentationError data clustering


Trying to identify some clusters in data with Python based on the example. At programming, I am a beginner. Code get expected an indented block error in line

centroids = np.zeros((K,X.shape[1])) (4-th row from botton)

Here is my code:

import os
print(os.listdir("Q:/Research/Python/Clusters/"))

# Any results you write to the current directory are saved as output.

ex7data2 = pd.read_csv("Q:/Research/Python/Clusters/Sadales_day_change.csv",header=None)
ex7data2.head()

ex7data2.plot.scatter(x=0,y=1)

# Calculate the nearest center of each sample
def findClosestCentroids(X, centroids):
distance = np.zeros((len(X),len(centroids)))
for i in range(len(X)):
    for j in range(len(centroids)):
        distance[i,j] = np.linalg.norm(X[i,:]-centroids[j,:])

return np.argmin(distance,axis=1)

# Update cluster center
def computeCentroids(X, idx, K):
centroids = np.zeros((K,X.shape[1]))
for i in range(K):
    centroids[i,:] = np.mean(X[idx == i],axis = 0)

return centroids

Any ideas?


Solution

  • As per your code the indentation was incorrect at findClosestCentroids and computeCentroids functions, please refer the following code snippet.

    import os
    print(os.listdir("Q:/Research/Python/Clusters/"))
    
    # Any results you write to the current directory are saved as output.
    
    ex7data2 = pd.read_csv("Q:/Research/Python/Clusters/Sadales_day_change.csv",header=None)
    ex7data2.head()
    
    ex7data2.plot.scatter(x=0,y=1)
    
    # Calculate the nearest center of each sample
    def findClosestCentroids(X, centroids):
        distance = np.zeros((len(X),len(centroids)))
        for i in range(len(X)):
            for j in range(len(centroids)):
                distance[i,j] = np.linalg.norm(X[i,:]-centroids[j,:])
    
        return np.argmin(distance,axis=1)
    
    # Update cluster center
    def computeCentroids(X, idx, K):
        centroids = np.zeros((K,X.shape[1]))
        for i in range(K):
            centroids[i,:] = np.mean(X[idx == i],axis = 0)
    
        return centroids