I am trying to do the following code in python numpy:
def log_loss(X, y, w, b=0): '''
Input:
X: data matrix of shape nxd
y: n-dimensional vector of labels (+1 or -1)
w: d-dimensional vector
b: scalar (optional, default is 0)
Output:
scalar
'''
assert np.sum(np.abs(y)) == len(y) # check if all labels in y are either +1 or -1 wt = w.T
n,d = X.shape
y_pred = np.zeros(n)
# I want to somehow not use this for loop here
for i in range(n):
y_pred[i] = np.log( sigmoid( y[i]*( wt@X[i]+b )))
return np.negative(np.sum(y_pred))
#########################################
def sigmoid(z): '''
Calculates the sigmoid of z.
Input:
z: scalar or array of dimension n
Output:
scalar or array of dimension n
'''
sig = 1/(1+np.exp(-z))
return sig
My Question is how can I do this more efficiently without using the tight loops? or using a more efficient solution? I think my solution ignores the main point of using numpy. Please advise.
def log_loss(X, y, w, b=0):
'''
Input:
X: data matrix of shape nxd
y: n-dimensional vector of labels (+1 or -1)
w: d-dimensional vector
b: scalar (optional, default is 0)
Output:
scalar
'''
assert np.sum(np.abs(y)) == len(y)
wt = w.T
n,d = X.shape
linear_pred = X.dot(wt) + b
prob_pred = sigmoid(linear_pred)
log_loss = np.mean(-y*np.log(prob_pred) - (1-y)*np.log(1-prob_pred))
return log_loss