Search code examples
pythonmachine-learninglogistic-regression

Regularized Logistic Regression in Python


The code is about a Regularized Logistic Regression and it is fine until the part that I use fmin_bfgs, that is, until the last line of the code. It was originally wrote in Octave, so I tested some values for each function before use fmin_bfgs and all the outputs were correct. The problem is when I try to minimize the cost_function_reg I receive the following message:

 line 53, in cost_function_reg
    thetaR = theta[1:, 0]
IndexError: too many indices for array

Code:

import numpy as np
from scipy.optimize import fmin_bfgs
from math import e

#import data
data = np.loadtxt('D:\ml\ex2data2.txt', delimiter=',')

X = data[:, 0:2]
y = data[:, 2:3]

#   map_feature(x1, x2) maps the two input features
#   to quadratic features used in the regularization exercise.
#
#   Returns a new feature array with more features, comprising of 
#   x1, x2, x1.^2, x2.^2, x1*X2, x1*x2.^2, etc..
# Note that mapFeature also adds a column of ones for us, so the intercept
# term is handled
def map_feature(x1, x2):
   
    x1.shape = (x1.size, 1)
    x2.shape = (x2.size, 1)
    degree = 6
    out = np.ones(shape=(x1[:, 0].size, 1))

   

    for i in range(1, degree + 1 ):
        for j in range(i + 1):
            r = (x1 ** (i - j)) * (x2 ** j)
            out = np.append(out, r, axis=1)

    return out
    

# Compute sigmoid function
def sigmoid(X):

    sigmoid = 1.0 / (1.0 + e ** (-1.0 * X))

    return sigmoid


#   Compute cost and gradient for logistic regression with regularization 
def cost_function_reg(theta, X, y, l):
    

    h = sigmoid(X.dot(theta))
    m, n = X.shape
    thetaR = theta[1:, 0]
    

    epsilon = 1e-5

    J = (1/m) * ((-y.T @ np.log(h+epsilon)) - (1-y).T @ np.log(1-h+epsilon)) + (l / (2 * m)) * (thetaR.T @ thetaR)
    
    delta = h - y
    grad1 = (1./m)*(delta.T.dot((X[:, 0:1])))
    
    
    XR = X[:, 1:]
    sumdelta = delta.T.dot(XR)
    grad2 = (1.0 / m) * (sumdelta + l * thetaR)
    grad2 = grad2.T
    
    
    grad = np.zeros(shape=(grad2.shape[0]+1, grad2.shape[1]))
    

    grad[0]=grad1
    grad[1:]=grad2
    
    
    return J.flatten(), grad.flatten()


mf = map_feature(X[:, 0], X[:, 1])

#Initialize theta parameters
initial_theta = np.zeros(shape=(mf.shape[1], 1))


#Set regularization parameter lambda to 1
l = 1

# Compute and display initial cost and gradient for regularized logistic
# regression
cost, grad = cost_function_reg(initial_theta, mf, y, l)
print('Cost at inicial theta: ', cost)
print('\nGrad at inicial theta: ', grad)


def decorated_cost(theta):
    return cost_function_reg(theta, mf, y, l)

fmin_bfgs(decorated_cost, initial_theta, maxiter=400)

Data that I used:

0.051267,0.69956,1
-0.092742,0.68494,1
-0.21371,0.69225,1
-0.375,0.50219,1
-0.51325,0.46564,1
-0.52477,0.2098,1
-0.39804,0.034357,1
-0.30588,-0.19225,1
0.016705,-0.40424,1
0.13191,-0.51389,1
0.38537,-0.56506,1
0.52938,-0.5212,1
0.63882,-0.24342,1
0.73675,-0.18494,1
0.54666,0.48757,1
0.322,0.5826,1
0.16647,0.53874,1
-0.046659,0.81652,1
-0.17339,0.69956,1
-0.47869,0.63377,1
-0.60541,0.59722,1
-0.62846,0.33406,1
-0.59389,0.005117,1
-0.42108,-0.27266,1
-0.11578,-0.39693,1
0.20104,-0.60161,1
0.46601,-0.53582,1
0.67339,-0.53582,1
-0.13882,0.54605,1
-0.29435,0.77997,1
-0.26555,0.96272,1
-0.16187,0.8019,1
-0.17339,0.64839,1
-0.28283,0.47295,1
-0.36348,0.31213,1
-0.30012,0.027047,1
-0.23675,-0.21418,1
-0.06394,-0.18494,1
0.062788,-0.16301,1
0.22984,-0.41155,1
0.2932,-0.2288,1
0.48329,-0.18494,1
0.64459,-0.14108,1
0.46025,0.012427,1
0.6273,0.15863,1
0.57546,0.26827,1
0.72523,0.44371,1
0.22408,0.52412,1
0.44297,0.67032,1
0.322,0.69225,1
0.13767,0.57529,1
-0.0063364,0.39985,1
-0.092742,0.55336,1
-0.20795,0.35599,1
-0.20795,0.17325,1
-0.43836,0.21711,1
-0.21947,-0.016813,1
-0.13882,-0.27266,1
0.18376,0.93348,0
0.22408,0.77997,0
0.29896,0.61915,0
0.50634,0.75804,0
0.61578,0.7288,0
0.60426,0.59722,0
0.76555,0.50219,0
0.92684,0.3633,0
0.82316,0.27558,0
0.96141,0.085526,0
0.93836,0.012427,0
0.86348,-0.082602,0
0.89804,-0.20687,0
0.85196,-0.36769,0
0.82892,-0.5212,0
0.79435,-0.55775,0
0.59274,-0.7405,0
0.51786,-0.5943,0
0.46601,-0.41886,0
0.35081,-0.57968,0
0.28744,-0.76974,0
0.085829,-0.75512,0
0.14919,-0.57968,0
-0.13306,-0.4481,0
-0.40956,-0.41155,0
-0.39228,-0.25804,0
-0.74366,-0.25804,0
-0.69758,0.041667,0
-0.75518,0.2902,0
-0.69758,0.68494,0
-0.4038,0.70687,0
-0.38076,0.91886,0
-0.50749,0.90424,0
-0.54781,0.70687,0
0.10311,0.77997,0
0.057028,0.91886,0
-0.10426,0.99196,0
-0.081221,1.1089,0
0.28744,1.087,0
0.39689,0.82383,0
0.63882,0.88962,0
0.82316,0.66301,0
0.67339,0.64108,0
1.0709,0.10015,0
-0.046659,-0.57968,0
-0.23675,-0.63816,0
-0.15035,-0.36769,0
-0.49021,-0.3019,0
-0.46717,-0.13377,0
-0.28859,-0.060673,0
-0.61118,-0.067982,0
-0.66302,-0.21418,0
-0.59965,-0.41886,0
-0.72638,-0.082602,0
-0.83007,0.31213,0
-0.72062,0.53874,0
-0.59389,0.49488,0
-0.48445,0.99927,0
-0.0063364,0.99927,0
0.63265,-0.030612,0

I am still beggining to learn python so any suggestions are acceptable. Thanks for your attention and I'm sorry for any problem, it's the first time I ask here.


Solution

  • Your logic scores better than 80% accuracy! Not shabby. Nicely done. I just had to make a few pythonic edits is all.

    I would break it up in Python(computing cost and gradient). It's ok in Matlab/Octave to return the pair...

    I took the Coursera Stanford Machine Learning Andrew Ng and did a similar assignment this way:

    import scipy.optimize as op
    import numpy as np
    from pylab import scatter, show, legend, xlabel, ylabel
    from numpy import where
    from sklearn.preprocessing import PolynomialFeatures
    
    
    def sigmoid(z):
        return 1/(1 + np.exp(-z))
    
    
    def compute_gradient(theta, X, y, l):
        m, n = X.shape
        theta = theta.reshape((n, 1))
        theta_r = theta[1:n, :]
        y = y.reshape((m, 1))
        h = sigmoid(X.dot(theta))
        non_regularized_gradient = ((np.sum(((h-y)*X), axis=0))/m).reshape(n, 1)
        reg = np.insert((l/m)*theta_r, 0, 0, axis=0)
        grad = non_regularized_gradient + reg
        return grad.flatten()
    
    
    def compute_cost(theta, X, y, l):
        h = sigmoid(X.dot(theta))
        m, n = X.shape
        theta = theta.reshape((n, 1))
        theta_r = theta[1:n, :]
        cost = np.sum((np.multiply(-y,np.log(h))-np.multiply((1-y),np.log(1-h))))/m
        reg=(l/(2*m)) * np.sum(np.square(theta_r))
        J = cost + reg
        return J
    
    
    def make_predictions(theta,X):
        m, n = X.shape
        return np.round(sigmoid(X.dot(theta.reshape(n, 1))))
    
    
    data = np.loadtxt(open("ex2data2.txt", "rb"), delimiter=",", skiprows=1)
    nr, nc = data.shape
    X = data[:, 0:nc - 1]
    y = data[:, [nc - 1]]
    pos = where(y == 1)
    neg = where(y == 0)
    scatter(X[pos, 0], X[pos, 1], marker='o', c='b')
    scatter(X[neg, 0], X[neg, 1], marker='x', c='r')
    xlabel('Equipment Test 1')
    ylabel('Eguipment Test 2')
    legend(['Nominal', 'Adverse'])
    show()
    storeX = X
    poly = PolynomialFeatures(6)
    X = poly.fit_transform(X)
    m, n = X.shape
    initial_theta = np.zeros((n, 1))
    l = 1
    
    print("Optimizing...")
    Result = op.minimize(fun=compute_cost, x0=initial_theta, args=(X, y, l), method='TNC', jac=compute_gradient)
    optimal_theta = Result.x
    print(Result.x.shape)
    print("optimal theta value")
    print(optimal_theta)
    p = make_predictions(optimal_theta, X)
    score = np.mean(np.double(p == y))
    print("Score:")
    print(score)
    

    Here's the output in PyCharm Community, Python version 3.7.3:

    enter image description here