python scipy neural-network minimization

Scipy.fmin_bfgs passing to many arguments to function

I am trying to program a neural network and was trying to minimize the cost function using scipy.optimize_bfgs() and after attempting to use this I get the error that "TypeError: cost() takes 3 positional arguments but 4 were given". Where are these four arguments coming from and how can I rectify this? The cost function is defined by:

def cost(param,X,y):
    Theta1 = np.reshape(param[0:106950:1],(75,1426))
    Theta2 = np.reshape(param[106950:112650:1],(75,76))
    Theta3 = np.reshape(param[112650::1],(1,76))
    m = len(X)
    J = 0
    a1 = X
    z2 = np.dot(a1,np.transpose(Theta1))
    a2 = sigmoid(z2)
    a2 = np.concatenate((np.ones((len(a2),1)),a2),axis=1)
    z3 = np.dot(a2,Theta2.T)
    a3 = sigmoid(z3)
    a3 = np.concatenate((np.ones((len(a3),1)),a3),axis=1)
    z4 = np.dot(a3,Theta3.T)
    a4 = sigmoid(z4)
    h = a4
    ##Calculate cost
    J = np.sum(np.sum(np.multiply(-y,np.log(h)) - np.multiply((1-y),np.log(1-h))))/(2*m)
    theta1_reg[:,0] = 0
    theta2_reg[:,0] = 0
    theta3_reg[:,0] = 0
    Reg = (lamb/(2*m))*(np.sum(np.sum(np.square(theta1_reg)))+np.sum(np.sum(np.sqaure(theta2_reg)))+np.sum(np.sum(np.square(theta3_reg))))
    J = J + Reg
    return J

The gradient is then calculated with:

def grad(param,X,y):
    Theta1 = np.reshape(param[0:106950:1],(75,1426))
    Theta2 = np.reshape(param[106950:112650:1],(75,76))
    Theta3 = np.reshape(param[112650::1],(1,76))
    Theta1_grad = np.zeros(Theta1.shape)
    Theta2_grad = np.zeros(Theta2.shape)
    Theta3_grad = np.zeros(Theta3.shape)
    m = len(X)
    ##Forward propogation
    a1 = X
    z2 = np.dot(a1,np.transpose(Theta1))
    a2 = sigmoid(z2)
    a2 = np.concatenate((np.ones((len(a2),1)),a2),axis=1)
    z3 = np.dot(a2,Theta2.T)
    a3 = sigmoid(z3)
    a3 = np.concatenate((np.ones((len(a3),1)),a3),axis=1)
    z4 = np.dot(a3,Theta3.T)
    a4 = sigmoid(z4)
    h = a4  
    ##Backward propogation  
    d4 = a4 - y
    d3 = np.multiply(np.dot(d4,Theta3[:,1:]),sigmoidGradient(z3))
    d2 = np.multiply(np.dot(d3,Theta2[:,1:]),sigmoidGradient(z2)) ## or sigmoid(z2) .* ( 1 - sigmoid(z2))
    D1 = np.dot(d2.T,a1)
    D2 = np.dot(d3.T,a2)
    D3 = np.dot(d4.T,a3)
    ##Unregularized gradients
    Theta1_grad = (1/m)*D1 
    Theta2_grad = (1/m)*D2
    Theta3_grad = (1/m)*D3
    ##Regularize gradients
    theta1_reg = Theta1
    theta2_reg = Theta2
    theta3_reg = Theta3
    theta1_reg[:,0] = 0
    theta2_reg[:,0] = 0
    theta3_reg[:,0] = 0
    theta1_reg = (lamb/m)*theta1_reg
    theta2_reg = (lamb/m)*theta2_reg
    theta3_reg = (lamb/m)*theta3_reg
    Theta1_grad = Theta1_grad + theta1_reg
    Theta2_grad = Theta2_grad + theta2_reg
    Theta3_grad = Theta3_grad + theta3_reg
    ##Concatenate gradients
    grad = np.concatenate((Theta1_grad,Theta2_grad,Theta3_grad),axis=None)
    return grad

Other functions defined are

def sigmoid(z):
    sig = 1 / (1 + np.exp(z))
    return sig

def randInitializeWeights(l_in, l_out):
    epsilon = 0.12;
    W = np.random.rand(l_out, 1+l_in)*2*epsilon - epsilon;
    return W
def sigmoidGradient(z):
    g = np.multiply(sigmoid(z),(1-sigmoid(z)))
    return g

As an example:

import numpy as np
import scipy.optimize
X = np.random.rand(479,1426)
y1 = np.zeros((frames,1))
y2 = np.ones((framesp,1)) 
y = np.concatenate((y1,y2),axis=0)
init_param = np.random.rand(112726,)
lamb = 0.5
scipy.optimize.fmin_bfgs(cost,fprime=grad,x0=init_param,args=(param,X,y))

Then the error appears. Thanks for any help

Solution

The arguments passed into the cost functions are the parameters, followed by the extra arguments. The parameters are chosen by the minimization function, the extra arguments are passed through.

When calling fmin_bfgs, only pass the extra arguments as args, not the actual parameters to optimize:

scipy.optimize.fmin_bfgs(..., args=(X,y))