Search code examples
python-3.xmachine-learningpycharmknnsupervised-learning

KNN Python implementation


this is what shows when i try running my code:

FutureWarning: Unlike other reduction functions (e.g. skew, kurtosis), the default behavior of mode typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of keepdims will become False, the axis over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set keepdims to True or False to avoid this warning. lab = mode(labels)

This is my Python code, and i find some difficulties trying find a suited solution:

# Importing the required modules
import numpy as np
from scipy.stats import mode


# Euclidean Distance
def eucledian(p1, p2):
    dist = np.sqrt(np.sum((p1 - p2) ** 2))
    return dist


# Function to calculate KNN
def predict(x_train, y, x_input, k):
    op_labels = []

    # Loop through the Datapoints to be classified
    for item in x_input:

        # Array to store distances
        point_dist = []

        # Loop through each training Data
        for j in range(len(x_train)):
            distances = eucledian(np.array(x_train[j, :]), item)
            # Calculating the distance
            point_dist.append(distances)
        point_dist = np.array(point_dist)

        # Sorting the array while preserving the index
        # Keeping the first K datapoints
        dist = np.argsort(point_dist)[:k]

        # Labels of the K datapoints from above
        labels = y[dist]

       ** # Majority voting
        lab = mode(labels)
        lab = lab.mode[0]
        op_labels.append(lab)**

    return op_labels

# Importing the required modules
# Importing required modules
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
from numpy.random import randint

# Loading the Data

iris= load_iris()

# Store features matrix in X

X= iris.data

# Store target vector in

y = iris.target

# Creating the training Data

train_idx = xxx = randint(0, 150, 100)

X_train = X[train_idx]

y_train = y[train_idx]

# Creating the testing Data

test_idx = xxx = randint(0, 150, 50)  # taking 50 random samples
X_test = X[test_idx]
y_test = y[test_idx]

# Applying our function
y_pred = predict(X_train, y_train, X_test, 7)

# Checking the accuracy
accuracy_score(y_test, y_pred)

I am expecting a prediction/accuracy to be the prompt.


Solution

  • KNN can be done like this.

    import numpy as np
    import matplotlib.pyplot as plt
    import pandas as pd
    
    
    url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
    
    # Assign colum names to the dataset
    names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class']
    
    # Read dataset to pandas dataframe
    dataset = pd.read_csv(url, names=names)
    
    
    dataset.head()
    
    
    X = dataset.iloc[:, :-1].values
    y = dataset.iloc[:, 4].values
    
    
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
    
    
    from sklearn.preprocessing import StandardScaler
    scaler = StandardScaler()
    scaler.fit(X_train)
    
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    
    
    from sklearn.neighbors import KNeighborsClassifier
    classifier = KNeighborsClassifier(n_neighbors=5, metric='minkowski')
    classifier.fit(X_train, y_train)
    
    
    y_pred = classifier.predict(X_test)
    
    
    from sklearn.metrics import classification_report, confusion_matrix
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred))
    
    # Result:
                     precision    recall  f1-score   support
    
        Iris-setosa       1.00      1.00      1.00        13
    Iris-versicolor       1.00      0.89      0.94         9
     Iris-virginica       0.89      1.00      0.94         8
    
           accuracy                           0.97        30
          macro avg       0.96      0.96      0.96        30
       weighted avg       0.97      0.97      0.97        30
    
    
    error = []
    # Calculating error for K values between 1 and 40
    for i in range(1, 40):
        knn = KNeighborsClassifier(n_neighbors=i)
        knn.fit(X_train, y_train)
        pred_i = knn.predict(X_test)
        error.append(np.mean(pred_i != y_test))
    
    
    plt.figure(figsize=(12, 6))
    plt.plot(range(1, 40), error, color='red', linestyle='dashed', marker='o',
             markerfacecolor='blue', markersize=10)
    plt.title('Error Rate K Value')
    plt.xlabel('K Value')
    plt.ylabel('Mean Error')
    

    enter image description here