Search code examples
pythontensorflowkerasdeep-learning

Change Conv2DTranspose output from (None, 39, 39, 1) to (None, 40, 40, 1)


I am implementing a Decoder (a type of Artificial Neural Network) using keras:

latent_dim = 25
latent_inputs = keras.Input(shape=(latent_dim,))

x = layers.Dense(units=100, activation="relu")(latent_inputs)
x = layers.Dense(units=1024, activation="relu")(x)
x = layers.Dense(units=4096, activation="relu")(x)
x = layers.Reshape((4, 4, 256))(x)
x = layers.Conv2DTranspose(filters=256, kernel_size=3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(filters=128, kernel_size=3, activation="relu", strides=1, padding="same")(x)
x = layers.Conv2DTranspose(filters=128, kernel_size=3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(filters=64, kernel_size=3, activation="relu", strides=1, padding="same")(x)
x = layers.Conv2DTranspose(filters=64, kernel_size=3, activation="relu", strides=2, padding="same")(x)
decoder_outputs = layers.Conv2DTranspose(filters=1, kernel_size=3, activation="sigmoid", padding="same")(x)

decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")

decoder.summary()

whose output is:

Model: "decoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_1 (InputLayer)        [(None, 25)]              0         
                                                                 
 dense (Dense)               (None, 100)               2600      
                                                                 
 dense_1 (Dense)             (None, 1024)              103424    
                                                                 
 dense_2 (Dense)             (None, 4096)              4198400   
                                                                 
 reshape (Reshape)           (None, 4, 4, 256)         0         
                                                                 
 conv2d_transpose (Conv2DTr  (None, 8, 8, 256)         590080    
 anspose)                                                        
                                                                 
 conv2d_transpose_1 (Conv2D  (None, 8, 8, 128)         295040    
 Transpose)                                                      
                                                                 
 conv2d_transpose_2 (Conv2D  (None, 16, 16, 128)       147584    
 Transpose)                                                      
                                                                 
 conv2d_transpose_3 (Conv2D  (None, 16, 16, 64)        73792     
 Transpose)                                                      
                                                                 
 conv2d_transpose_4 (Conv2D  (None, 32, 32, 64)        36928     
 Transpose)                                                      
                                                                 
 conv2d_transpose_5 (Conv2D  (None, 32, 32, 1)         577       
 Transpose)                                                      
                                                                 

I want to adjust my model so that decoder_outputs shape is (None, 40, 40, 1) instead of (None, 32, 32, 1). This is what I tried to do:

latent_dim = 25
latent_inputs = keras.Input(shape=(latent_dim,))

x = layers.Dense(units=100, activation="relu")(latent_inputs)
x = layers.Dense(units=1024, activation="relu")(x)
x = layers.Dense(units=1600, activation="relu")(x)  # Adjusted units to match 40*40*1
x = layers.Reshape((40, 40, 1))(x)  # Reshaped to (40, 40, 1)
x = layers.Conv2DTranspose(filters=128, kernel_size=3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(filters=64, kernel_size=3, activation="relu", strides=1, padding="same")(x)
x = layers.Conv2DTranspose(filters=64, kernel_size=3, activation="relu", strides=2, padding="same")(x)
decoder_outputs = layers.Conv2DTranspose(filters=1, kernel_size=3, activation="sigmoid", padding="same")(x)

decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")

decoder.summary()

but unfortunately decoder_outputs shape is (None, 160, 160, 1).

Can you help me, please?

EDIT

I tried the following solution:

latent_dim = 25
latent_inputs = keras.Input(shape=(latent_dim,))

x = layers.Dense(units=100, activation="relu")(latent_inputs)
x = layers.Dense(units=1024, activation="relu")(x)
x = layers.Dense(units=4096, activation="relu")(x)
x = layers.Reshape((4, 4, 256))(x)
x = layers.Conv2DTranspose(filters=256, kernel_size=3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(filters=128, kernel_size=3, activation="relu", strides=1, padding="same")(x)
x = layers.Conv2DTranspose(filters=128, kernel_size=3, activation="relu", strides=2, padding="valid")(x)
x = layers.Conv2DTranspose(filters=64, kernel_size=3, activation="relu", strides=1, padding="valid")(x)
x = layers.Conv2DTranspose(filters=64, kernel_size=3, activation="relu", strides=2, padding="valid")(x)
decoder_outputs = layers.Conv2DTranspose(filters=1, kernel_size=3, activation="sigmoid", padding="valid")(x)

decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")

decoder.summary()

that is using padding="same" for some layers, but this is the output I get:

Model: "decoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_1 (InputLayer)        [(None, 25)]              0         
                                                                 
 dense (Dense)               (None, 100)               2600      
                                                                 
 dense_1 (Dense)             (None, 1024)              103424    
                                                                 
 dense_2 (Dense)             (None, 4096)              4198400   
                                                                 
 reshape (Reshape)           (None, 4, 4, 256)         0         
                                                                 
 conv2d_transpose (Conv2DTr  (None, 8, 8, 256)         590080    
 anspose)                                                        
                                                                 
 conv2d_transpose_1 (Conv2D  (None, 8, 8, 128)         295040    
 Transpose)                                                      
                                                                 
 conv2d_transpose_2 (Conv2D  (None, 16, 16, 128)       147584    
 Transpose)                                                      
                                                                 
 conv2d_transpose_3 (Conv2D  (None, 18, 18, 64)        73792     
 Transpose)                                                      
                                                                 
 conv2d_transpose_4 (Conv2D  (None, 37, 37, 64)        36928     
 Transpose)                                                      
                                                                 
 conv2d_transpose_5 (Conv2D  (None, 39, 39, 1)         577       
 Transpose)

As you can see decoder_outputs shape is now (None, 39, 39, 1). I want it to be (None, 40, 40, 1). How may I fix?


Solution

  • so the reason I was asking for the model input was so that I could do the calculations to create your desired output. If you want to learn how to calculate the output for your CNN layers, here is a thread to help you out. Here is the code:

    latent_dim = 25
    latent_inputs = keras.Input(shape=(latent_dim,))
    
    x = layers.Dense(units=100, activation="relu")(latent_inputs)
    x = layers.Dense(units=1024, activation="relu")(x)
    x = layers.Dense(units=4096, activation="relu")(x)
    x = layers.Reshape((4, 4, 256))(x)
    x = layers.Conv2DTranspose(filters=256, kernel_size=3, activation="relu", strides=2, padding="same")(x)
    x = layers.Conv2DTranspose(filters=128, kernel_size=3, activation="relu", strides=1, padding="same")(x)
    x = layers.Conv2DTranspose(filters=128, kernel_size=3, activation="relu", strides=5, padding="same")(x)
    x = layers.Conv2DTranspose(filters=64, kernel_size=3, activation="relu", strides=1, padding="same")(x)
    x = layers.Conv2DTranspose(filters=64, kernel_size=3, activation="relu", strides=1, padding="same")(x)
    decoder_outputs = layers.Conv2DTranspose(filters=1, kernel_size=3, activation="sigmoid", padding="same")(x)
    
    decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")
    
    decoder.summary()
    

    Here is the output:

    Model: "decoder"
    _________________________________________________________________
     Layer (type)                Output Shape              Param #   
    =================================================================
     input_46 (InputLayer)       [(None, 25)]              0         
                                                                     
     dense_138 (Dense)           (None, 100)               2600      
                                                                     
     dense_139 (Dense)           (None, 1024)              103424    
                                                                     
     dense_140 (Dense)           (None, 4096)              4198400   
                                                                     
     reshape_46 (Reshape)        (None, 4, 4, 256)         0         
                                                                     
     conv2d_transpose_265 (Conv2  (None, 8, 8, 256)        590080    
     DTranspose)                                                     
                                                                     
     conv2d_transpose_266 (Conv2  (None, 8, 8, 128)        295040    
     DTranspose)                                                     
                                                                     
     conv2d_transpose_267 (Conv2  (None, 40, 40, 128)      147584    
     DTranspose)                                                     
                                                                     
     conv2d_transpose_268 (Conv2  (None, 40, 40, 64)       73792     
     DTranspose)                                                     
                                                                     
     conv2d_transpose_269 (Conv2  (None, 40, 40, 64)       36928     
     DTranspose)                                                     
                                                                     
     conv2d_transpose_270 (Conv2  (None, 40, 40, 1)        577       
     DTranspose)                                                     
                                                                     
    =================================================================
    Total params: 5,448,425
    Trainable params: 5,448,425
    Non-trainable params: 0
    _________________________________________________________________