I'm trying to build a tensorflow model for analysing board games, so I started with a simpler 2D dataset. I generated 1000 images of black semicircles like these:
I thought it would be a good exercise to try and recover the angle of the flat side. I labeled these two example images as 210.474° and 147.593°.
Unfortunately, the results I get are terrible. All the predictions on the test data are roughly 180°, presumably close to the mean value of the labels.
Can anyone give me advice on how to improve my model architecture or otherwise improve my results? If all of the input data is boolean pixels, do I need to normalize it?
I create the model like this:
def build_and_compile_model():
num_channels = 200
kernel_size = 3
image_height = 64
image_width = 64
regularizer = regularizers.l2(0.0001)
model = keras.Sequential(
[layers.Conv2D(num_channels,
kernel_size,
padding='same',
activation='relu',
input_shape=(image_height, image_width, 1),
activity_regularizer=regularizer),
layers.Dense(64, activation='relu'),
layers.Dense(64, activation='relu'),
layers.Dense(1)])
model.compile(loss='mean_absolute_error',
optimizer=tf.keras.optimizers.Adam(0.001))
return model
When I try to fit the model, it improves for a few epochs, then stabilizes at a high error.
Here's the complete example:
import math
import shutil
import typing
from datetime import datetime
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image, ImageDraw
import tensorflow as tf
from space_tracer import LivePillowImage
from tensorflow import keras
from tensorflow.python.keras import layers, regularizers
def build_and_compile_model():
num_channels = 200
kernel_size = 3
image_height = 64
image_width = 64
regularizer = regularizers.l2(0.0001)
model = keras.Sequential(
[layers.Conv2D(num_channels,
kernel_size,
padding='same',
activation='relu',
input_shape=(image_height, image_width, 1),
activity_regularizer=regularizer),
layers.Dense(64, activation='relu'),
layers.Dense(64, activation='relu'),
layers.Dense(1)])
model.compile(loss='mean_absolute_error',
optimizer=tf.keras.optimizers.Adam(0.001))
return model
def main():
image_folder = Path(__file__).parent / 'circle_images'
num_images = 1000
image_data, label_data = read_input_data(num_images, image_folder)
# Make NumPy printouts easier to read.
np.set_printoptions(precision=3, suppress=True)
image_count = image_data.shape[0]
image_data = image_data.reshape(image_data.shape + (1, ))
train_size = math.floor(image_count * 0.8)
train_dataset = image_data[:train_size, :, :]
test_dataset = image_data[train_size:, :, :]
train_labels = label_data[:train_size]
test_labels = label_data[train_size:]
test_results = {}
dnn_model = build_and_compile_model()
print('training dataset:', train_dataset.shape)
print('training labels:', train_labels.shape)
start = datetime.now()
history = dnn_model.fit(
train_dataset,
train_labels,
validation_split=0.2,
verbose=0, epochs=25)
print('Trained for', datetime.now() - start)
test_results['dnn_model'] = dnn_model.evaluate(test_dataset, test_labels, verbose=0)
print(pd.DataFrame(test_results, index=['Mean absolute error [game value]']).T)
test_predictions = dnn_model.predict(test_dataset).flatten()
print(test_labels[:10])
print(test_predictions[:10])
plot_loss(history)
def create_images(num_images: int, image_folder: Path) -> None:
print(f'Creating {num_images} images.')
image_folder.mkdir()
start_angles = np.random.random(num_images)
start_angles *= 360
rng = np.random.default_rng()
rng.shuffle(start_angles)
for i, start_angle in enumerate(start_angles):
image_path = image_folder / f'image{i}.png'
image = create_image(start_angle)
image.save(image_path)
label_text = '\n'.join(str(start_angle) for start_angle in start_angles)
(image_folder / 'labels.csv').write_text(label_text)
def create_image(start_angle: float) -> Image.Image:
image = Image.new('1', (64, 64)) # B&W 64x64
drawing = ImageDraw.Draw(image)
drawing.rectangle((0, 0, 64, 64), fill='white')
drawing.pieslice(((0, 0), (63, 63)),
-start_angle,
-start_angle+180,
fill='black')
return image
def read_input_data(num_images: int, image_folder: Path) -> typing.Tuple[
np.ndarray,
np.ndarray]:
""" Read input data from the image folder.
:returns: (images, labels)
"""
labels = []
if image_folder.exists():
with (image_folder / 'labels.csv').open() as f:
for line in f:
labels.append(float(line))
image_count = len(labels)
if image_count != num_images:
# Size has changed, so recreate the input data.
shutil.rmtree(image_folder, ignore_errors=True)
create_images(num_images, image_folder)
return read_input_data(num_images, image_folder)
label_data = np.array(labels)
images = np.zeros((image_count, 64, 64))
for i, image_path in enumerate(sorted(image_folder.glob('*.png'))):
image = Image.open(image_path)
bits = np.array(image)
images[i, :, :] = bits
return images, label_data
def plot_loss(history):
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.ylim(bottom=0)
plt.xlabel('Epoch')
plt.ylabel('Error [angle]')
plt.legend()
plt.grid(True)
plt.show()
def demo():
image = create_image(226.634)
LivePillowImage(image).display()
if __name__ == '__main__':
main()
elif __name__ == '__live_coding__':
demo()
At the end, I see this output:
Trained for 0:00:09.155005
Mean absolute error [game value]
dnn_model 92.051697
7/7 [==============================] - 0s 4ms/step
[210.474 147.593 327.796 120.112 163.402 178.04 333.604 342.488 119.694
240.8 ]
[177.15 181.242 181.242 181.242 181.242 181.242 181.242 181.242 181.242
181.242]
You can see that all the predictions are close to 180°.
The problem lies in the way you process your data. In general it is a very unsafe idea to rely on some files ordering for your ML model. Instead store inputs and corresponding labels in one spot, in a database of some sort.
for i, image_path in enumerate(sorted(image_folder.glob('*.png'))):
image = Image.open(image_path)
bits = np.array(image)
images[i, :, :] = bits
This specific loop is wrong, because string ordering is not the same as number ordering. So if you sort file names you will get for example
image234.png < image3.png
as this is lexicographic sorting.
Consequently your entire data has completely shuffled labels, and thus your model can't learn anything but to predict a mean (which you see now). If you were to generate just 12 images, you would end up with something like:
Image1 -> Label1
Image10 -> Label2
Image11 -> Label3
Image12 -> Label4
Image2 -> Label5
Image3 -> Label6
Image4 -> Labe7
Image5 -> Label8
Image6 -> Label9
Image7 -> Label10
Image8 -> Label11
Image9 -> Label12
One fix could be to change the loop above to
for i in range(len(label_data)):
image_path = image_folder / f"image{i}.png" # some logic here to point into the correct file using i
image = Image.open(image_path)
bits = np.array(image)
images[i, :, :] = bits
After fixing it, you should be able to learn your mapping even with a tiny MLP, you don't even need a convolution for that.
model = keras.Sequential(
[
layers.Flatten(),
layers.Dense(32),
layers.Dense(1)])
and training for 300 epochs gives