Search code examples
python-2.7numpymatplotlibmachine-learningperceptron

Perceptron implementation, decision boundary will not plot


I am attempting to implement a perceptron. I have loaded a 100x2 array of values between 0 and 100. Each item in the array has a label of either -1 or 1.

I believe the perceptron is working, however I cannot plot decision boundary as shown here: plot decision boundary matplotlib

When I run my code I only see a single color background. I would expect to see two colors, one color for each label in my data set (-1 and 1).

My current output, I expect to see 2 colors for the background (-1 or 1)

An example of what I hope to see, from the sklearn documentation

import numpy as np
from matplotlib import pyplot as plt


def generate_data():
    #generate a dataset that is linearly seperable
    group_1 = np.random.randint(50, 100, size=(50,2))
    group_1_labels = np.full((50,1), 1)

    group_2 = np.random.randint(0, 49, size =(50,2))
    group_2_labels = np.full((50,1), -1)

    #add a bias value of -1
    bias = np.full((50,1), -1)

    #add labels, upper right quadrant are 1, lower left are -1
    group_1_with_bias = np.hstack((group_1, bias))
    group_2_with_bias = np.hstack((group_2, bias))

    group_1_labeled = np.hstack((group_1_with_bias, group_1_labels))
    group_2_labeled = np.hstack((group_2_with_bias, group_2_labels))

    #merge our labeled data and shuffle!
    merged_data = np.vstack((group_1_labeled, group_2_labeled))
    np.random.shuffle(merged_data)

    return merged_data

data = generate_data()

#load data, strip labels, add a -1 bias value
X = data[:, :3]

#create labels matrix
l = np.ravel(data[:, 3:])


def perceptron_sgd(X, l, c, epochs):
    #initialize weights
    w = np.zeros(3)

    errors = []
    for epoch in range(epochs):
        total_error = 0
        for i, x in enumerate(X):
            if (np.dot(x, w) * l[i]) <= 0:
                total_error += (np.dot(x, w) * l[i])
                w = w + c * (x * l[i])

        errors.append(total_error * -1)
        print "epoch " + str(epoch) + ": " + str(w)
    return w, errors

def classify(X, l, w):
    z = np.dot(X, w)
    print z

    z[z <= 0] = -1
    z[z > 0] = 1
    #return a matrix of predicted labels
    return z

w, errors = perceptron_sgd(X, l, .001, 36)

# X - some data in 2dimensional np.array
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, .2), np.arange(y_min, y_max, .2))

# here "model" is your model's prediction (classification) function
Z = classify(np.c_[xx.ravel(), yy.ravel()], l, w[:-1]) #strip the bias from weights

# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)
plt.axis('off')

#Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=l, cmap=plt.cm.Paired)

Solution

  • I got it to work.

    Standardized your X

    from sklearn import preprocessing
    scaler = preprocessing.StandardScaler().fit(X[:, :-1])
    X_trans = np.column_stack((scaler.transform(X[:, :-1]), X[:, -1]))
    

    Better initialization than zero.

    #initialize weights
    r = np.sqrt(2)
    w = np.random.uniform(-r, r, (3,))
    

    Add learned biases during prediction

    z = np.dot(X, w[:-1]) + w[-1]
    

    Standardize during prediction as well (using standardization learned from input)

    Z = classify(scaler.transform(np.c_[xx.ravel(), yy.ravel()]), 
        l, w) #strip the bias from weights
    

    Generally, always a good idea to standardize the inputs.

    Final prediction boundary

    Entire code:

    import numpy as np
    from matplotlib import pyplot as plt
    %matplotlib inline
    
    def generate_data():
        #generate a dataset that is linearly seperable
        group_1 = np.random.randint(50, 100, size=(50,2))
        group_1_labels = np.full((50,1), 1)
    
        group_2 = np.random.randint(0, 49, size =(50,2))
        group_2_labels = np.full((50,1), -1)
    
        #add a bias value of -1
        bias = np.full((50,1), -1)
    
        #add labels, upper right quadrant are 1, lower left are -1
        group_1_with_bias = np.hstack((group_1, bias))
        group_2_with_bias = np.hstack((group_2, bias))
    
        group_1_labeled = np.hstack((group_1_with_bias, group_1_labels))
        group_2_labeled = np.hstack((group_2_with_bias, group_2_labels))
    
        #merge our labeled data and shuffle!
        merged_data = np.vstack((group_1_labeled, group_2_labeled))
        np.random.shuffle(merged_data)
    
        return merged_data
    
    data = generate_data()
    
    #load data, strip labels, add a -1 bias value
    X = data[:, :3]
    
    #create labels matrix
    l = np.ravel(data[:, 3:])
    
    from sklearn import preprocessing
    scaler = preprocessing.StandardScaler().fit(X[:, :-1])
    X_trans = np.column_stack((scaler.transform(X[:, :-1]), X[:, -1]))
    
    
    def perceptron_sgd(X, l, c, epochs):
        #initialize weights
        r = np.sqrt(2)
        w = np.random.uniform(-r, r, (3,))
    
        errors = []
        for epoch in range(epochs):
            total_error = 0
            for i, x in enumerate(X):
                if (np.dot(x, w) * l[i]) <= 0:
                    total_error += (np.dot(x, w) * l[i])
                    w = w + c * (x * l[i])
    
            errors.append(total_error * -1)
            print("epoch " + str(epoch) + ": " + str(w))
        return w, errors
    
    def classify(X, l, w):
        z = np.dot(X, w[:-1]) + w[-1]
        print(z)
    
        z[z <= 0] = -1
        z[z > 0] = 1
        #return a matrix of predicted labels
        return z
    
    w, errors = perceptron_sgd(X_trans, l, .01, 25)
    
    # X - some data in 2dimensional np.array
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, .1), np.arange(y_min, y_max, .1))
    
    # here "model" is your model's prediction (classification) function
    Z = classify(scaler.transform(np.c_[xx.ravel(), yy.ravel()]), l, w) #strip the bias from weights
    
    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, alpha=0.4)
    #plt.axis('off')
    
    #Plot also the training points
    plt.scatter(X[:, 0], X[:, 1], c=l, cmap=plt.cm.Paired)