Search code examples
pythondeep-learningneural-networkjupyter-notebookautoencoder

ValueError: Input 0 of layer sequential_7 incompatible with the layer: expected axis -1 of input shape to have value 5 but received shape (None, 21)


**I'm trying to use autoencoders on the NASA provided data for software defect prediction, the data I am importing is JM1.csv ** But I think the decoder part or decoder layer isn't working, I want to display all 21 columns in the CSV file, not the 5 reduced columns.

import pandas as pd
import tensorflow as tf
# import kerastuner.tuners as kt
import matplotlib.pyplot as plt
# from tensorflow.keras import Model
# from tensorflow.keras import Sequential
from tensorflow import keras
from sklearn.preprocessing import StandardScaler
# from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
# from tensorflow.keras.losses import MeanSquaredLogarithmicError


TRAIN_DATA_PATH = 'NASA/JM1.csv'
TEST_DATA_PATH = 'NASA/JM1.csv'
TARGET_NAME = 'HALSTEAD_LEVEL'

train_data = pd.read_csv(TRAIN_DATA_PATH)
test_data = pd.read_csv(TEST_DATA_PATH)

x_train, y_train = train_data.drop(TARGET_NAME, axis=1), train_data[TARGET_NAME]
x_test, y_test = test_data.drop(TARGET_NAME, axis=1), test_data[TARGET_NAME]

from sklearn.preprocessing import MinMaxScaler

def scale_datasets(x_train, x_test):
    """
    Standard Scale test and train data
    """
    standard_scaler = MinMaxScaler()
    x_train_scaled = pd.DataFrame(
      standard_scaler.fit_transform(x_train),
      columns=x_train.columns
    )
    x_test_scaled = pd.DataFrame(
      standard_scaler.transform(x_test),
      columns = x_test.columns
    )
    return x_train_scaled, x_test_scaled
  
x_train_scaled, x_test_scaled = scale_datasets(x_train, x_test)

class AutoEncoders(tf.keras.Model):

    def __init__(self, output_units):
        super().__init__()
        self.encoder = tf.keras.Sequential(
            [
              tf.keras.layers.Dense(21, activation="relu"),
              tf.keras.layers.Dense(10, activation="relu"),
              tf.keras.layers.Dense(5, activation="relu")
            ]
        )

        self.decoder = tf.keras.Sequential(
            [
              tf.keras.layers.Dense(5, activation="relu"),
              tf.keras.layers.Dense(10, activation="relu"),
              tf.keras.layers.Dense(21, activation="relu"),
              tf.keras.layers.Dense(output_units, activation="sigmoid")
            ]
        )

    def call(self, inputs):
        encoded = self.encoder(inputs)
        decoded = self.decoder(encoded)
        return decoded

auto_encoder = AutoEncoders(len(x_train_scaled.columns))

auto_encoder.compile(
    loss='mae',
    metrics=['mae'],
    optimizer='adam'
)

history = auto_encoder.fit(
    x_train_scaled, 
    x_train_scaled, 
    epochs=15, 
    batch_size=32, 
    validation_data=(x_test_scaled, x_test_scaled)
)

encoder_layer = auto_encoder.get_layer('sequential_7')
reduced_df = pd.DataFrame(encoder_layer.predict(x_train_scaled))
reduced_df = reduced_df.add_prefix('feature_')

*here is it shows the 2 layers sequential_6 and sequential_7, If I use sequential _6 it gives me the output of only 5 features, but I want all the features like to show the 21 decoded features.*
display(reduced_df)
# for layer in auto_encoder.layers:
#     print(layer.name)

What I assume is that it shows only the encoded part 5, but not the decoded part/features 21


Solution

  • Just restart and clear all output of your jupyter kernal. replace the 'sequential_7' with 'sequential'. and then display the the reduced_df. it will work fine.