I'm newbie in neural network and I try to do mlp text classification using keras. everytime I run the code, it get different val loss and and val accuracy. Val loss is increase and val accuarcy is decrease everytime I re-run it. The code that I'm using is like this:
#Split data training and testing (80:20)
Train_X2, Test_X2, Train_Y2, Test_Y2 = model_selection.train_test_split(dataset['review'],dataset['sentiment'],test_size=0.2, random_state=1)
Encoder = LabelEncoder()
Train_Y2 = Encoder.fit_transform(Train_Y2)
Test_Y2 = Encoder.fit_transform(Test_Y2)
Tfidf_vect2 = TfidfVectorizer(max_features=None)
Tfidf_vect2.fit(dataset['review'])
Train_X2_Tfidf = Tfidf_vect2.transform(Train_X2)
Test_X2_Tfidf = Tfidf_vect2.transform(Test_X2)
#Model
model = Sequential()
model.add(Dense(100, input_dim= 1148, activation='sigmoid'))
model.add(Dense(1, activation='sigmoid'))
opt = Adam (learning_rate=0.01)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
model.summary()
from keras.backend import clear_session
clear_session()
es = EarlyStopping(monitor="val_loss",mode='min',patience=10)
history = model.fit(arr_Train_X2_Tfidf, Train_Y2, epochs=100,verbose=1, validation_split=0.2,validation_data=(arr_Test_X2_Tfidf, Test_Y2), batch_size=32, callbacks =[es])
I try using clear_session()
to make the model not start off with the computed weights from the previous training. But it still get difference value. How to fix it? thank you
How can I get constant value val accuracy and val loss in keras
I guess what you want is reproducible train runs. For that you will have to seed the random number generator. Getting reproducible result with seed is tricky on the GPU because some operations on GPU are non deterministic. However, with the model architecture you are using it is not a problem.
make model not start off with the computed weights from the previous training.
It is not the case, you are creating the model every time and the Dense layers you are using gets initialised from glorot_uniform
distributions.
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras import callbacks
import matplotlib.pyplot as plt
import os
import numpy as np
import random as rn
import random as python_random
def seed():
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = ""
np.random.seed(123)
python_random.seed(123)
tf.random.set_seed(1234)
def train(set_seed):
if set_seed:
seed()
dataset = {
'review': [
'This is the first document.',
'This document is the second document.',
'And this is the third one.',
'Is this the first document?',
],
'sentiment': [0,1,0,1]
}
Train_X2, Test_X2, Train_Y2, Test_Y2 = train_test_split(
dataset['review'],dataset['sentiment'],test_size=0.2, random_state=1)
Encoder = LabelEncoder()
Train_Y2 = Encoder.fit_transform(Train_Y2)
Test_Y2 = Encoder.fit_transform(Test_Y2)
Tfidf_vect2 = TfidfVectorizer(max_features=None)
Tfidf_vect2.fit(dataset['review'])
Train_X2_Tfidf = Tfidf_vect2.transform(Train_X2).toarray()
Test_X2_Tfidf = Tfidf_vect2.transform(Test_X2).toarray()
#Model
model = keras.Sequential()
model.add(layers.Dense(100, input_dim= 9, activation='sigmoid'))
model.add(layers.Dense(1, activation='sigmoid'))
opt = optimizers.Adam (learning_rate=0.01)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
#model.summary()
history = model.fit(Train_X2_Tfidf, Train_Y2, epochs=10,verbose=0,
validation_data=(Test_X2_Tfidf, Test_Y2), batch_size=32)
return history
def run(set_seed=False):
plt.figure(figsize=(7,7))
for i in range(5):
history = train(set_seed)
plt.plot(history.history['val_loss'], label=f"{i+1}")
plt.legend()
plt.title("With Seed" if set_seed else "Wihout Seed")
run()
run(True)
Outout:
You can see how the val_loss
is different without seed (as it depends on the initial value of Dense layer and other places where random number generation is used) and how the val_loss
is exactly the same with seed which make sure the initial values of Dense layers you are using are same between runs (and at other places where random number generation is used).