**I'm trying to use autoencoders on the NASA provided data for software defect prediction, the data I am importing is JM1.csv ** But I think the decoder part or decoder layer isn't working, I want to display all 21 columns in the CSV file, not the 5 reduced columns.
import pandas as pd
import tensorflow as tf
# import kerastuner.tuners as kt
import matplotlib.pyplot as plt
# from tensorflow.keras import Model
# from tensorflow.keras import Sequential
from tensorflow import keras
from sklearn.preprocessing import StandardScaler
# from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
# from tensorflow.keras.losses import MeanSquaredLogarithmicError
TRAIN_DATA_PATH = 'NASA/JM1.csv'
TEST_DATA_PATH = 'NASA/JM1.csv'
TARGET_NAME = 'HALSTEAD_LEVEL'
train_data = pd.read_csv(TRAIN_DATA_PATH)
test_data = pd.read_csv(TEST_DATA_PATH)
x_train, y_train = train_data.drop(TARGET_NAME, axis=1), train_data[TARGET_NAME]
x_test, y_test = test_data.drop(TARGET_NAME, axis=1), test_data[TARGET_NAME]
from sklearn.preprocessing import MinMaxScaler
def scale_datasets(x_train, x_test):
"""
Standard Scale test and train data
"""
standard_scaler = MinMaxScaler()
x_train_scaled = pd.DataFrame(
standard_scaler.fit_transform(x_train),
columns=x_train.columns
)
x_test_scaled = pd.DataFrame(
standard_scaler.transform(x_test),
columns = x_test.columns
)
return x_train_scaled, x_test_scaled
x_train_scaled, x_test_scaled = scale_datasets(x_train, x_test)
class AutoEncoders(tf.keras.Model):
def __init__(self, output_units):
super().__init__()
self.encoder = tf.keras.Sequential(
[
tf.keras.layers.Dense(21, activation="relu"),
tf.keras.layers.Dense(10, activation="relu"),
tf.keras.layers.Dense(5, activation="relu")
]
)
self.decoder = tf.keras.Sequential(
[
tf.keras.layers.Dense(5, activation="relu"),
tf.keras.layers.Dense(10, activation="relu"),
tf.keras.layers.Dense(21, activation="relu"),
tf.keras.layers.Dense(output_units, activation="sigmoid")
]
)
def call(self, inputs):
encoded = self.encoder(inputs)
decoded = self.decoder(encoded)
return decoded
auto_encoder = AutoEncoders(len(x_train_scaled.columns))
auto_encoder.compile(
loss='mae',
metrics=['mae'],
optimizer='adam'
)
history = auto_encoder.fit(
x_train_scaled,
x_train_scaled,
epochs=15,
batch_size=32,
validation_data=(x_test_scaled, x_test_scaled)
)
encoder_layer = auto_encoder.get_layer('sequential_7')
reduced_df = pd.DataFrame(encoder_layer.predict(x_train_scaled))
reduced_df = reduced_df.add_prefix('feature_')
*here is it shows the 2 layers sequential_6 and sequential_7, If I use sequential _6 it gives me the output of only 5 features, but I want all the features like to show the 21 decoded features.*
display(reduced_df)
# for layer in auto_encoder.layers:
# print(layer.name)
What I assume is that it shows only the encoded part 5
, but not the decoded part/features 21
Just restart and clear all output of your jupyter kernal. replace the 'sequential_7' with 'sequential'. and then display the the reduced_df. it will work fine.