Computes the gradient for linear regression
Args:
X (ndarray (m,n)): Data, m examples with n features
y (ndarray (m,)) : target values
w (ndarray (n,)) : model parameters
b (scalar) : model parameter
Returns:
dj_dw (ndarray (n,)): The gradient of the cost w.r.t. the parameters w.
dj_db (scalar): The gradient of the cost w.r.t. the parameter b.
import numpy as np
def gradient(X, y, w, b):
m,n = X.shape #(number of examples, number of features)
dj_dw = np.zeros((n,))
dj_db = 0
for i in range(m):
err = (np.dot(X[i],w) + b) - y[i]
for j in range(n):
dj_dw[j] = dj_dw[j] + err * X[i, j]
dj_db = dj_db + err
dj_dw = dj_dw / m
dj_db = dj_db / m
return dj_db, dj_dw
b_init = 785.1811367994083
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618,-33.2342342])
tmp_dj_db, tmp_dj_dw = gradient(X_train, y_train, w_init, b_init)
print(f'dj_db at initial w,b: {tmp_dj_db}')
print(f'dj_dw at initial w,b: \n {tmp_dj_dw}')
This is the exact error I'm getting
KeyError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3360 try:
-> 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
6 frames
/usr/local/lib/python3.7/dist-packages/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
/usr/local/lib/python3.7/dist-packages/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 1
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
<ipython-input-73-1f87825d3f02> in <module>
1 b_init = 785.1811367994083
2 w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618,-33.2342342])
----> 3 tmp_dj_db, tmp_dj_dw = gradient(X_train, y_train, w_init, b_init)
4 print(f'dj_db at initial w,b: {tmp_dj_db}')
5 print(f'dj_dw at initial w,b: \n {tmp_dj_dw}')
<ipython-input-72-52811c00c1ad> in gradient(X, y, w, b)
16 dj_db = 0
17 for i in range(m):
---> 18 err = (np.dot(X.iloc[i,],w) + b) - y[i]
19 for j in range(n):
20 dj_dw[j] = dj_dw[j] + err * X.iloc[i, j]
/usr/local/lib/python3.7/dist-packages/pandas/core/series.py in __getitem__(self, key)
940
941 elif key_is_scalar:
--> 942 return self._get_value(key)
943
944 if is_hashable(key):
/usr/local/lib/python3.7/dist-packages/pandas/core/series.py in _get_value(self, label, takeable)
1049
1050 # Similar to Index.get_value, but we do not fall back to positional
-> 1051 loc = self.index.get_loc(label)
1052 return self.index._get_values_for_loc(self, loc, label)
1053
/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
-> 3363 raise KeyError(key) from err
3364
3365 if is_scalar(key) and isna(key) and not self.hasnans:
KeyError: 1
This it the dataset - It's a drive link
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Dataset
As I suspected the issue was that you were trying to iterate over pandas dataframe. This should work
import numpy as np
import pandas as pd
def gradient(X, y, w, b):
m, n = X.shape # (number of examples, number of features)
dj_dw = np.zeros((n,))
dj_db = 0
for i in range(m):
err = (np.dot(X[i], w) + b) - y[i]
for j in range(n):
dj_dw[j] = dj_dw[j] + err * X[i, j]
dj_db = dj_db + err
dj_dw = dj_dw / m
dj_db = dj_db / m
return dj_db, dj_dw
b_init = 785.1811367994083
w_init = np.array([0.39133535, 18.75376741, -53.36032453, -26.42131618, -33.2342342])
data = pd.read_csv(r'USA_Housing - USA_Housing.csv')
X_train, y_train = data.drop(['Price'], axis=1, errors='ignore'), data['Price']
tmp_dj_db, tmp_dj_dw = gradient(X_train.to_numpy(), y_train.to_numpy(), w_init, b_init)
print(f'dj_db at initial w,b: {tmp_dj_db}')
print(f'dj_dw at initial w,b: \n {tmp_dj_dw}')