I have programmed a GAN model using keras but the training didn't go well. The generator model always returns a bare noise image (28x28 size) instead of something similar to mnist dataset. This doesn't give me any error though, when it comes to training discriminator model will become trainable=False
, which is not what I want to do.
If this implementation is bad, please let me know. Can anyone help?
import os
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, BatchNormalization
from keras.optimizers import SGD, Adam, RMSprop
from keras.datasets import mnist
from keras.regularizers import l1_l2
def plot_generated(noise, Generator):
image_fake = Generator.predict(noise)
plt.figure(figsize=(10,8))
plt.show()
plt.close()
def plot_metircs(metrics, epoch=None):
plt.figure(figsize=(10,8))
plt.plot(metrics['d'], label='discriminative loss', color='b')
plt.legend()
plt.show()
plt.close()
plt.figure(figsize=(10,8))
plt.plot(metrics['g'], label='generative loss', color='r')
plt.legend()
plt.show()
plt.close()
def Generator():
model = Sequential()
LeakyReLU = keras.layers.advanced_activations.LeakyReLU(alpha=0.2)
model.add(Dense(input_dim=100, units=128, activation=LeakyReLU, name='g_input'))
model.add(Dense(input_dim=128, units=784, activation='tanh', name='g_output'))
return model
def Discriminator():
model = Sequential()
LeakyReLU = keras.layers.advanced_activations.LeakyReLU(alpha=0.2)
model.add(Dense(input_dim=784, units=128, activation=LeakyReLU, name='d_input'))
model.add(Dense(input_dim=128, units=1, activation='sigmoid', name='d_output'))
model.compile(loss='binary_crossentropy', optimizer='Adam')
return model
def Generative_Adversarial_Network(Generator, Discriminator):
model = Sequential()
model.add(Generator)
model.add(Discriminator)
# train only generator in the entire GAN architecture
Discriminator.trainable = False
model.compile(loss='binary_crossentropy', optimizer='Adam')
return model
def Training(z_input_size, Generator, Discriminator, GAN, loss_dict, X_train, epoch, batch, smooth):
for e in range(epoch):
# z: noise, used for input of G to generate fake image based on this noise! it's like a seed
noise = np.random.uniform(-1, 1, size=[batch, z_input_size])
image_fake = Generator.predict_on_batch(noise)
# sampled real_image from dataset
rand_train_index = np.random.randint(0, X_train.shape[0], size=batch)
image_real = X_train[rand_train_index, :]
# concatenate real and fake images
"""
X = [
image_real => label : 1 (we can multiply a smoothing factor)
image_fake => label : 0
]
"""
X = np.vstack((image_real, image_fake))
y = np.zeros(len(X))
# putting label "1" to image_real
y[len(image_real):] = 1*(1 - smooth)
y = y.astype(int)
# train only discriminator
d_loss = Discriminator.train_on_batch(x=X, y=y)
# NOTE: remember?? we set discriminator OFF during the training of GAN!
# So, we can safely train only generator, weight of discriminator set fixed!
g_loss = GAN.train_on_batch(x=noise, y=y[len(noise):])
loss_dict['d'].append(d_loss)
loss_dict['g'].append(g_loss)
if e%1000 == 0:
plt.imshow(image_fake)
plt.show()
plot_generated(noise, Generator)
plot_metircs(loss_dict)
return "done!"
Gen = Generator()
Dis = Discriminator()
GAN = Generative_Adversarial_Network(Gen, Dis)
GAN.summary()
Gen.summary()
Dis.summary()
gan_losses = {"d":[], "g":[], "f":[]}
epoch = 30000
batch = 1000
smooth = 0.9
z_input_size = 100
row, col = 28, 28
z_group_matrix = np.random.uniform(0, 1, examples*z_input_size)
z_group_matrix = z_group_matrix.reshape([9, z_input_size])
print(z_group_matrix.shape)
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train, X_test = X_train.reshape(X_train.shape[0], row*col), X_test.reshape(X_test.shape[0], row*col)
X_train.astype('float32')
X_test.astype('float32')
X_train, X_test = X_train/255, X_test/255
print('X_train shape: ', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
Training(z_input_size, Gen, Dis, GAN, loss_dict=gan_losses, X_train=X_train, epoch=epoch, batch=batch, smooth=smooth)
The model itself is correct.
I would suggest a few minor changes: