I am getting a value error for parameters (not enough to unpack expected 2 got 1) I have a network I want to train:
def build(self):
numpy.random.seed(self.seed)
self.estimators.append(('standardize', StandardScaler))
self.estimators.append(('mlp', KerasClassifier(build_fn=self.build_fn, epochs=50, batch_size=5, verbose=0)))
self.pipeline = Pipeline(self.estimators)
Now if I want to fit the data to some values: say self.X, self.Y
self.model = self.pipeline.fit(self.X, self.Y, verbose=1)
I get
Traceback (most recent call last):
File "C:/Users/jaehan/PycharmProjects/cerebro/cerebro.py", line 257, in
<module>
model.run()
File "C:/Users/jaehan/PycharmProjects/cerebro/cerebro.py", line 138, in run
self.model = self.pipeline.fit(self.X, self.Y, verbose=1)
File "C:\Users\jaehan\AppData\Local\Continuum\anaconda3\envs\py36\lib\site-
packages\sklearn\pipeline.py", line 248, in fit
Xt, fit_params = self._fit(X, y, **fit_params)
File "C:\Users\jaehan\AppData\Local\Continuum\anaconda3\envs\py36\lib\site-
packages\sklearn\pipeline.py", line 197, in _fit
step, param = pname.split('__', 1)
ValueError: not enough values to unpack (expected 2, got 1)
Am I doing something wrong here? I was under the impression I could just run a fit and it would return a history object, which I could save and load at any time
I even tried...
self.pipeline.fit(self.X, self.Y)
Which throws...
AttributeError: 'numpy.ndarray' object has no attribute 'fit'
I have no idea what is going on here.
Full Code
class Cerebro:
def __init__(self):
self.model = None
self.build_fn = None
self.data = None
self.X = None
self.Y = None
#these three are for encoding string values to integer_encodings / one hot encodings
self.encoder = LabelEncoder()
self.encodings = {}
self.one_hot_encodings = {}
self.seed = numpy.random.seed(7) #this is to ensure we have reproducible results.
self.estimators = []
self.pipeline = None
self.kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=self.seed)
self.cross_validation_score = 0.0
def preprocess(self):
"""
This method will preprocess the dataset we want to train our network on.
Example:
import preproccessing
...
dataset, X, Y = preprocessing.main()
"""
self.data = pandas.read_csv('src_examples/hwtxn_final_for_influx.txt', sep='\t').values
self.X = numpy.delete(self.data, 13, axis=1)
self.Y = self.data[:, 13].astype(numpy.float16)
def build(self):
self.build_fn = self.base_model()
self.preprocess()
numpy.random.seed(self.seed)
self.estimators.append(('standardize', StandardScaler()))
self.estimators.append(('mlp', KerasClassifier(build_fn=self.build_fn, epochs=50, batch_size=5, verbose=0)))
self.pipeline = Pipeline(self.estimators)
def run(self):
"""This will actually take the pipeline (preprocessing standardization, model)
and fit it to our dataset (X, Y) (We don't need test/train since we are using stratified k fold cross val.)
Args:
None
Returns:
None
"""
# this is the 'model'
# self.pipeline
print(type(self.pipeline))
print(self.X.shape)
self.model = self.pipeline.fit(self.X, self.Y)
def load(self, fn):
"""This will load a saved model (history object)
Args:
fn (filename): represents saved model file
Returns:
model (pkl object): represents model
"""
return pickle.load(open(fn, 'rb'))
def save(self, fn):
"""This will save a model (history object)
Args:
fn (filename): represents a filename to save the model as
Returns:
None
"""
pickle.dump(self.model, open(fn, 'wb'))
def encode(self, vals, key):
""" This method will encode a list of values and take a key (representing column name, or index) to save
in the class object (self.encodings)
This will help us keep track of encodings we have for values we need to translate/decipher.
Args:
vals(np.array): array of values to encode
key(str): str representing the key used to encode this particular set of values
Returns:
transformed values (np.array) representing the encoded versions of values
"""
# int encoding for non int values
self.encodings[key] = self.encoder.fit_transform(vals)
return self.encoder.fit_transform(vals)
def decoder(self, vals, key):
"""This method will decode the integer_encodings for class variables. It will take vals which
represents a list of values to decode (i.e. [1,2,3] -- [apple, pear, orange])
It will also take a key (since every decoding has a corresponding encoding) to find which encoding
scheme to map to
Args:
vals(np.array) : array of values to decode
key(str) : string representing the key used for encoding the values (for decoding it)
Returns:
inverse transform of encoded values (np.array)
"""
# translate int encodings to original values (encoder._classes)
return self.encodings[key].inverse_transform(vals)
def cross_validate(self):
"""
This will perform a cross validation score using a stratified kfold method. (Think traditional Kfold but
with the values evenly distributed for each subsample)
Args:
None
Returns:
None
"""
self.cross_validation_score = cross_val_score(self.pipeline, self.X, self.Y, cv=self.kfold)
return self.cross_validation_score
@staticmethod
def base_model():
"""
This will return a base model for us to try. The good thing about this implementation is that
when we decide we want something more complex then all we have to do is define a class function and replace
the values in the build f(x)
Args:
None
Returns:
model (keras.models.Sequential): Keras based DNN Model
"""
# create model
model = Sequential()
model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
@staticmethod
def one_hot_encoder(int_encoding):
"""
This will take an integer encoding of string variables (traditional preprocessing step, will probably
move this to the preprocessing package.
Essential it returns a binary 'one hot' encoding of the values we wish to encode
Example
#Dataset Values
[apple, orange, pear]
#Integer Encoding
[1, 2, 3]
#One Hot Encoding
[[1, 0, 0]
[0, 1, 0]
[0, 0, 1]]
Args:
None
Returns:
Matrix (np.array): matrix representing one hot vectors for a class of values
"""
# we might not need this... so for now we will keep it static
return OneHotEncoder(sparse=False).fit_transform(int_encoding.reshape(len(int_encoding), 1))
if __name__ == '__main__':
# Step 1 is to initialize class (with seed == 7)
model = Cerebro()
model.build()
model.cross_validate()
print("Here are our estimators:\n {}".format(model.estimators))
print("Here is our pipeline:\n {}".format(model.pipeline))
model.run()
EDIT The answer is that .fit() build_fn argument requires a function pointer and not the model itself.
IMHO I feel an error should be thrown for specifically that case.
This is due to the following line:
self.build_fn = self.base_model()
This should actually be:
self.build_fn = self.base_model
KerasClassifier requires a pointer to the function which creates the model, but by appending ()
at the end, you are assigning build_fn
with the actual model, which is wrong.
Now in addition to above error, I would recommend checking the following lines in your code, which if not corrected will give error in future when you will use the code.
1) self.encodings[key] = self.encoder.fit_transform(vals)
Here you are assigning the transformed data to the encodings[key]
not the model. So when you do this:-
self.encodings[key].inverse_transform(vals)
It makes no sense to call inverse_transform()
on the transformed data.
inverse_transform()
is a method of scikit-learn transformers. But self.encodings[key]
will give out a ndarray, because you have saved the output array from fit_transform()
.
2) Something similar to 2 is also happening with one_hot_encoder()
The error "AttributeError: 'numpy.ndarray' object has no attribute 'fit'"
seems related to 1 and 2.