Search code examples
pythontensorflowmachine-learningdeep-learningneural-network

Model structure for regression from images


I'm trying to build a tensorflow model for analysing board games, so I started with a simpler 2D dataset. I generated 1000 images of black semicircles like these:

input image 1 input image 2

I thought it would be a good exercise to try and recover the angle of the flat side. I labeled these two example images as 210.474° and 147.593°.

Unfortunately, the results I get are terrible. All the predictions on the test data are roughly 180°, presumably close to the mean value of the labels.

Can anyone give me advice on how to improve my model architecture or otherwise improve my results? If all of the input data is boolean pixels, do I need to normalize it?

I create the model like this:

def build_and_compile_model():
    num_channels = 200
    kernel_size = 3
    image_height = 64
    image_width = 64
    regularizer = regularizers.l2(0.0001)

    model = keras.Sequential(
        [layers.Conv2D(num_channels,
                       kernel_size,
                       padding='same',
                       activation='relu',
                       input_shape=(image_height, image_width, 1),
                       activity_regularizer=regularizer),
         layers.Dense(64, activation='relu'),
         layers.Dense(64, activation='relu'),
         layers.Dense(1)])

    model.compile(loss='mean_absolute_error',
                  optimizer=tf.keras.optimizers.Adam(0.001))
    return model

When I try to fit the model, it improves for a few epochs, then stabilizes at a high error.

Plot of model training

Here's the complete example:

import math
import shutil
import typing
from datetime import datetime
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image, ImageDraw
import tensorflow as tf
from space_tracer import LivePillowImage
from tensorflow import keras
from tensorflow.python.keras import layers, regularizers


def build_and_compile_model():
    num_channels = 200
    kernel_size = 3
    image_height = 64
    image_width = 64
    regularizer = regularizers.l2(0.0001)

    model = keras.Sequential(
        [layers.Conv2D(num_channels,
                       kernel_size,
                       padding='same',
                       activation='relu',
                       input_shape=(image_height, image_width, 1),
                       activity_regularizer=regularizer),
         layers.Dense(64, activation='relu'),
         layers.Dense(64, activation='relu'),
         layers.Dense(1)])

    model.compile(loss='mean_absolute_error',
                  optimizer=tf.keras.optimizers.Adam(0.001))
    return model


def main():
    image_folder = Path(__file__).parent / 'circle_images'
    num_images = 1000
    image_data, label_data = read_input_data(num_images, image_folder)

    # Make NumPy printouts easier to read.
    np.set_printoptions(precision=3, suppress=True)

    image_count = image_data.shape[0]
    image_data = image_data.reshape(image_data.shape + (1, ))

    train_size = math.floor(image_count * 0.8)
    train_dataset = image_data[:train_size, :, :]
    test_dataset = image_data[train_size:, :, :]
    train_labels = label_data[:train_size]
    test_labels = label_data[train_size:]

    test_results = {}

    dnn_model = build_and_compile_model()

    print('training dataset:', train_dataset.shape)
    print('training labels:', train_labels.shape)

    start = datetime.now()
    history = dnn_model.fit(
        train_dataset,
        train_labels,
        validation_split=0.2,
        verbose=0, epochs=25)
    print('Trained for', datetime.now() - start)

    test_results['dnn_model'] = dnn_model.evaluate(test_dataset, test_labels, verbose=0)
    print(pd.DataFrame(test_results, index=['Mean absolute error [game value]']).T)

    test_predictions = dnn_model.predict(test_dataset).flatten()
    print(test_labels[:10])
    print(test_predictions[:10])

    plot_loss(history)


def create_images(num_images: int, image_folder: Path) -> None:
    print(f'Creating {num_images} images.')
    image_folder.mkdir()
    start_angles = np.random.random(num_images)
    start_angles *= 360
    rng = np.random.default_rng()
    rng.shuffle(start_angles)
    for i, start_angle in enumerate(start_angles):
        image_path = image_folder / f'image{i}.png'
        image = create_image(start_angle)
        image.save(image_path)
    label_text = '\n'.join(str(start_angle) for start_angle in start_angles)
    (image_folder / 'labels.csv').write_text(label_text)


def create_image(start_angle: float) -> Image.Image:
    image = Image.new('1', (64, 64))  # B&W 64x64
    drawing = ImageDraw.Draw(image)
    drawing.rectangle((0, 0, 64, 64), fill='white')
    drawing.pieslice(((0, 0), (63, 63)),
                     -start_angle,
                     -start_angle+180,
                     fill='black')
    return image


def read_input_data(num_images: int, image_folder: Path) -> typing.Tuple[
        np.ndarray,
        np.ndarray]:
    """ Read input data from the image folder.

    :returns: (images, labels)
    """
    labels = []
    if image_folder.exists():
        with (image_folder / 'labels.csv').open() as f:
            for line in f:
                labels.append(float(line))
    image_count = len(labels)
    if image_count != num_images:
        # Size has changed, so recreate the input data.
        shutil.rmtree(image_folder, ignore_errors=True)
        create_images(num_images, image_folder)
        return read_input_data(num_images, image_folder)
    label_data = np.array(labels)
    images = np.zeros((image_count, 64, 64))
    for i, image_path in enumerate(sorted(image_folder.glob('*.png'))):
        image = Image.open(image_path)
        bits = np.array(image)
        images[i, :, :] = bits
    return images, label_data


def plot_loss(history):
    plt.plot(history.history['loss'], label='loss')
    plt.plot(history.history['val_loss'], label='val_loss')
    plt.ylim(bottom=0)
    plt.xlabel('Epoch')
    plt.ylabel('Error [angle]')
    plt.legend()
    plt.grid(True)
    plt.show()


def demo():
    image = create_image(226.634)
    LivePillowImage(image).display()


if __name__ == '__main__':
    main()
elif __name__ == '__live_coding__':
    demo()

At the end, I see this output:

Trained for 0:00:09.155005
           Mean absolute error [game value]
dnn_model                         92.051697
7/7 [==============================] - 0s 4ms/step
[210.474 147.593 327.796 120.112 163.402 178.04  333.604 342.488 119.694
 240.8  ]
[177.15  181.242 181.242 181.242 181.242 181.242 181.242 181.242 181.242
 181.242]

You can see that all the predictions are close to 180°.


Solution

  • The problem lies in the way you process your data. In general it is a very unsafe idea to rely on some files ordering for your ML model. Instead store inputs and corresponding labels in one spot, in a database of some sort.

     for i, image_path in enumerate(sorted(image_folder.glob('*.png'))):
            image = Image.open(image_path)
            bits = np.array(image)
            images[i, :, :] = bits
    

    This specific loop is wrong, because string ordering is not the same as number ordering. So if you sort file names you will get for example

    image234.png < image3.png

    as this is lexicographic sorting.

    Consequently your entire data has completely shuffled labels, and thus your model can't learn anything but to predict a mean (which you see now). If you were to generate just 12 images, you would end up with something like:

      Image1  -> Label1
      Image10 -> Label2
      Image11 -> Label3
      Image12 -> Label4
      Image2  -> Label5
      Image3  -> Label6
      Image4  -> Labe7
      Image5  -> Label8
      Image6  -> Label9
      Image7  -> Label10
      Image8  -> Label11
      Image9  -> Label12
    

    One fix could be to change the loop above to

     for i in range(len(label_data)):
            image_path = image_folder / f"image{i}.png" # some logic here to point into the correct file using i
            image = Image.open(image_path)
            bits = np.array(image)
            images[i, :, :] = bits
    

    After fixing it, you should be able to learn your mapping even with a tiny MLP, you don't even need a convolution for that.

    
        model = keras.Sequential(
            [
                layers.Flatten(),
                layers.Dense(32),
                layers.Dense(1)])
    

    and training for 300 epochs gives

    enter image description here