Im making a convolutional neural network and always get a loss function oscilating more in the middle, how can I fix it? Loss error
My data are only 100 images, I want to overfit to see that everything goes well but when getting the loss always in the central part there is a greater oscillation, I have tried to lower the learning rate but it is always the same, there is always a greater oscillation in the half of the curve, take all the 100 images in the batch size but the oscillation does not decrease, this is my code.Why can this happen? How can I fix it?
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import os
train_path='/Users/David/Deskt...'
batch_size_train=100
num_epochs=1
tf.logging.set_verbosity(tf.logging.INFO)
sess=tf.Session()
#Convolutional Model
def cnn_model(features,labels,mode):
#Capa de ingreso
input_layer=tf.reshape(features["x"],[-1,224,224,3])
#Capa convolucional 1........
conv1=tf.layers.conv2d(
inputs=input_layer,
filters=64,
kernel_size=[10,10],
padding="same",
activation=tf.nn.relu,
name="Convolucion_1")
#Pooling 1.........
pool1=tf.layers.max_pooling2d(inputs=conv1,pool_size=[2,2],strides=2,name="Pool_1")
conv2=tf.layers.conv2d(
inputs=pool1,
filters=128,
kernel_size=[10,10],
padding="same",
activation=tf.nn.relu,
name="Convolucion_2")
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2,name="Pool_2")
conv3=tf.layers.conv2d(
inputs=pool2,
filters=192,
kernel_size=[10,10],
padding="same",
activation=tf.nn.relu,
name="Convolucion_3")
pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2], strides=2,name="Pool_3")
conv4=tf.layers.conv2d(
inputs=pool3,
filters=256,
kernel_size=[10,10],
padding="same",
activation=tf.nn.relu,
name="Convolucion_4")
pool4 = tf.layers.max_pooling2d(inputs=conv4, pool_size=[2, 2], strides=2,name="Pool_4")
conv5=tf.layers.conv2d(
inputs=pool4,
filters=320,
kernel_size=[10,10],
padding="same",
activation=tf.nn.relu,
name="Convolucion_5")
pool5 = tf.layers.max_pooling2d(inputs=conv5, pool_size=[2, 2], strides=2,name="Pool_5")
pool5_flat=tf.reshape(pool5,[-1,7*7*320],name="Flat_Pool")
#Deep neural network..............
dense=tf.layers.dense(inputs=pool5_flat,units=10000,activation=tf.nn.relu,name="Capa_1")
dense1=tf.layers.dense(inputs=dense,units=7000,activation=tf.nn.relu,name="Capa_2")
dense2=tf.layers.dense(inputs=dense1,units=4000,activation=tf.nn.relu,name="Capa_3")
dense3=tf.layers.dense(inputs=dense2,units=1000,activation=tf.nn.relu,name="Capa_4")
dense4=tf.layers.dense(inputs=dense3,units=500,activation=tf.nn.relu,name="Capa_5")
logits=tf.layers.dense(inputs=dense4,units=2,name="Capa_final")
onehot_labels = tf.one_hot(indices=labels, depth=2)
t=tf.nn.softmax(logits, name="softmax_tensor")
loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits)
tf.summary.scalar('loss',loss)
ds=tf.train.SummarySaverHook(save_steps=1,output_dir="/Users/David/Desktop/David/Tesis/Practica/Programas/CNN/Model_Chekpoint",summary_op=tf.summary.merge_all())
loss_hook = tf.train.LoggingTensorHook(tensors={"loss":loss}, every_n_iter=1)
if mode==tf.estimator.ModeKeys.TRAIN:
optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op=optimizer.minimize(
loss=loss,
global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode,loss=loss,train_op=train_op,training_hooks=[ds,loss_hook])
def read_file(filename_queue):
#Funcion para leer el archivo tf.record, y retornamos el next recrod
reader=tf.TFRecordReader()
_,serialized_example=reader.read(filename_queue)
#Se decodifica el tf.record retornando un diccionario
feature={'train/image':tf.FixedLenFeature([],tf.string),
'train/label':tf.FixedLenFeature([],tf.int64)}
features=tf.parse_single_example(serialized_example,features=feature,name="Decodificacion_Parse")
#Convertimos el string a numeros de los decodificados features
image=tf.decode_raw(features['train/image'],tf.float32,name="imagenes_decod")* (1 / 255.0)
#Convertimos a datos
label=tf.cast(features['train/label'],dtype=tf.int32,name="label_decod")
#Reshape data
image=tf.reshape(image,[224,224,3])
return image,label
def input_pipeline(filenames,batch_size):
#Creacion de una lista de los archivos
filename_queue=tf.train.string_input_producer([filenames],num_epochs=1,shuffle=True,name="Creacion_lista_archiv")
images,labels=read_file(filename_queue)
#Mezclar (shuffle) los datos de entrada
min_after_dequeue=100
capacity=min_after_dequeue+3*batch_size
images,labels=tf.train.shuffle_batch([images,labels],batch_size=batch_size,capacity=capacity,num_threads=2,min_after_dequeue=min_after_dequeue,name="Shuffle_data_in")
return images,labels
def main(unused_argv):
#Lectura y Decodificacion de datos
img_train,lbl_train=input_pipeline(train_path,batch_size_train)
#Estimator - Modelo
gun_detector=tf.estimator.Estimator(model_fn=cnn_model,model_dir="/Users/David/Desktop/David/Tesis/Practica/Programas/CNN/Model_Chekpoint")
#Inicializacion de variables y run de la session
init_op=tf.group(tf.global_variables_initializer(),tf.local_variables_initializer())
sess.run(init_op)
#Corremos las filas(queue) que se crearon en el grafico computacional
coord = tf.train.Coordinator()
threads=tf.train.start_queue_runners(sess=sess,coord=coord)
try:
while not coord.should_stop():
img,lbl=sess.run([img_train,lbl_train])
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": img},
y=lbl,
batch_size=70,
num_epochs=None,
shuffle=True)
gun_detector.train(
input_fn=train_input_fn,
steps=5000)
except tf.errors.OutOfRangeError:
print('Done training -- epoch limit reached')
finally:
coord.request_stop()
coord.join(threads)
sess.close()
if __name__ == '__main__':
tf.app.run()
Oscillations occured due to the learning rate. If you learn to fast you'll skip over the local minimums and your loss function will be divergent. If you make your learning rate to small you will never converge or converge very slowly. You can try to fiddle around with your learning rate to get those oscillations away but then you run the risk of over training your model. Your graph looks fine to me as long as you converge In a reasonable amount of time you shouldn't care what happens in the middle.