i am new to tensorflow and i have looked through the tutorials and executed them successfully. Now i have to solve a Problem where my output should NOT be a categorical one like MNIST Labels (1-10). i want to Count objects in an Image and therefore i Need just one numerical output value, because the result can be in a range between 0-300+ so a result encoded in a one-hot-vector is not applicable.
My code is below (most of it is copied from the MNIST tut) It works fine if i have multiple class and the Labels are encoded in an one-hot-vector.
i think what i have to adjust is the cost function:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(prediction, y))
but i don't know how to. Can anybody help me please So the prediction Returns one value and the y (ground truth) is also one value eg.[5] for 5 objects.
### CNN CONFIG
n_classes = 1
batch_size = 100
x = tf.placeholder('float', [None, 16384]) # 128*128 = 16384 28*28 = 784
y = tf.placeholder('float')
keep_rate = 0.8
keep_prob = tf.placeholder(tf.float32)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def maxpool2d(x):
# size of window movement of window
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def convolutional_neural_network(x):
weights = {'W_conv1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
'W_conv2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
'W_fc': tf.Variable(tf.random_normal([32 * 32 * 64, 1024])),
'out': tf.Variable(tf.random_normal([1024, n_classes]))}
biases = {'b_conv1': tf.Variable(tf.random_normal([32])),
'b_conv2': tf.Variable(tf.random_normal([64])),
'b_fc': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_classes]))}
x = tf.reshape(x, shape=[-1, 128, 128, 1])
conv1 = tf.nn.relu(conv2d(x, weights['W_conv1']) + biases['b_conv1'])
conv1 = maxpool2d(conv1)
conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']) + biases['b_conv2'])
conv2 = maxpool2d(conv2)
#fc = tf.reshape(conv2, [-1, 7 * 7 * 64])
fc = tf.reshape(conv2, [-1, 32 * 32 * 64])
fc = tf.nn.relu(tf.matmul(fc, weights['W_fc']) + biases['b_fc'])
#fc = tf.nn.dropout(fc, keep_rate)
output = tf.matmul(fc, weights['out']) + biases['out']
return output
def train_neural_network(x):
prediction = convolutional_neural_network(x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(prediction, y))
optimizer = tf.train.AdamOptimizer().minimize(cost)
#saver
saver = tf.train.Saver()
hm_epochs = 50
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
for epoch in range(hm_epochs):
epoch_loss = 0
logging.debug('Epoch: ' + str(epoch) +' started')
for i in range(int(len(train_database['images']) / batch_size)):
epoch_x, epoch_y = getNextBatch(train_database, (i + 1) * batch_size)
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out of', hm_epochs, 'loss:', epoch_loss)
The standard approach would be to use a mean squared error:
cost = tf.reduce_mean(tf.square(prediction - y))