Search code examples
pythonimage-processingtensorflowloss-function

feeding tensorflow with raw images dimension error


I am quite new to tensorflow so I tried to use the code in the tutorial to feed some layers with images of size (944,944) and classes yes/no (1,0) to see how it performs, but I havent been able to make it work. Last error I am getting is: "Dimension size must be evenly divisible by 57032704 but is 3565440 for 'Reshape_1' with input shapes: [10,236,236,64], 2 and with input tensors computed as partial shapes: input1 = [?,57032704]".

I do not know if the error comes from any of the reshape operations or because I cant feed the neuros like this. Code is the following:

import tensorflow as tf
import numpy as np
import os
# import cv2
from scipy import ndimage
import PIL

tf.logging.set_verbosity(tf.logging.INFO)

def define_model(features, labels, mode):
"""Model function for CNN."""
# Input Layer
input_layer = tf.reshape(features["x"], [-1,944, 944, 1])

# Convolutional Layer #1
conv1 = tf.layers.conv2d(
  inputs=input_layer,
  filters=32,
  kernel_size=[16, 16],
  padding="same",
  activation=tf.nn.relu)

# Pooling Layer #1
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

# Convolutional Layer #2 and Pooling Layer #2
conv2 = tf.layers.conv2d(
    inputs=pool1,
    filters=64,
    kernel_size=[16, 16],
    padding="same",
    activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)

# Dense Layer
pool2_flat = tf.reshape(pool2, [-1,944*944*64])
dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
dropout = tf.layers.dropout(
    inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)

# Logits Layer - raw predictions
logits = tf.layers.dense(inputs=dropout, units=10)

predictions = {
    # Generate predictions (for PREDICT and EVAL mode)
    "classes": tf.argmax(input=logits, axis=1),
    # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
    # `logging_hook`.
    "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
}

if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

# Calculate Loss (for both TRAIN and EVAL modes)
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

# Configure the Training Op (for TRAIN mode)
if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
    train_op = optimizer.minimize(
        loss=loss,
        global_step=tf.train.get_global_step())
    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

# Add evaluation metrics (for EVAL mode)
eval_metric_ops = {
    "accuracy": tf.metrics.accuracy(
        labels=labels, predictions=predictions["classes"])}
return tf.estimator.EstimatorSpec(
    mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

if __name__ == '__main__':
# Load training and eval data
# mnist = tf.contrib.learn.datasets.load_dataset("mnist")
# train_data = mnist.train.images  # Returns np.array
# train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
train_data, train_labels = load_images("C:\\Users\\Heads\\Desktop\\BDManchas_Semi")

eval_data = train_data.copy()
eval_labels = train_labels.copy()

# Create the Estimator
classifier = tf.estimator.Estimator(
    model_fn=define_model, model_dir="/tmp/convnet_model")

# Set up logging for predictions
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(
    tensors=tensors_to_log, every_n_iter=50)

# Train the model
train_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"x": train_data},
    y=train_labels,
    batch_size=10,
    num_epochs=None,
    shuffle=True)
classifier.train(
    input_fn=train_input_fn,
    steps=20000,
    hooks=[logging_hook])

# Evaluate the model and print results
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"x": eval_data},
    y=eval_labels,
    num_epochs=1,
    shuffle=False)
eval_results = classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)

---------------------------------MORE:----------------------------------

Ok so now that I have working the reshape, I am having another error, the loss during training is NaN. I have been researching about this (here there is a good answer) but for every new function I use, there is a different error. I have tried to change the loss from:

loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

to:

loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)

But it seems to be also problems with the reshape, error says that logits and labels must have the same shape ((10,10) vs (10,)), I have tried to reshape logits and labels but I always get a different error (I guess there is no way to equalize both arrays).

The labels are defined like following:

list_of_classes = []
# if ... class == 1
list_of_classes.append(1)
#else
list_of_classes.append(0)

labels = np.array(list_of_classes).astype("int32") 

Any idea on how to use the proper loss?


Solution

  • So the solution was to change the line:

    pool2_flat = tf.reshape(pool2, [-1,944*944*64])
    

    for the line:

    pool2_flat = tf.layers.flatten(pool2)
    

    Also I needed to use 512x512 resized images instead of 944x944 because it did not fit in memory...