Keras K fold cross validation has higher MSE

I'm trying to get an accurate MSE on my dataset. With the following code:

# Selecting inputs and outputs
inputs = data[input_columns].select_dtypes(include=[np.number])
outputs = data[output_columns].select_dtypes(include=[np.number])

# Initialize the scaler for inputs and outputs
scaler = MinMaxScaler()
output_scaler = MinMaxScaler()

# KFold
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Metrics
test_losses, test_maes, test_mses, test_mapes, test_r2 = [], [], [], [], []

# Store metrics history for plots
history_list = []

for train, test in kf.split(inputs, outputs):
    # Splitting data
    X_train = inputs.iloc[train]
    X_test = inputs.iloc[test]
    y_train = outputs.iloc[train]
    y_test = outputs.iloc[test]

    # Normalizing inputs
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Normalizing outputs
    y_train_scaled = output_scaler.fit_transform(y_train)
    y_test_scaled = output_scaler.transform(y_test)

    # Model definition
    model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dense(64, activation='relu'),
    Dense(y_train_scaled.shape[1])  # Match number of outputs
    ])

    # Compile the model
    model.compile(optimizer='adam', loss='mse', metrics=['mae', 'mean_squared_error', 'mean_absolute_percentage_error', r_squared])

    # Train the model
    history = model.fit(X_train_scaled, y_train_scaled, epochs=500, batch_size=32, verbose=0, validation_split=0.2)
    history_list.append(history)

    # Evaluation
    test_loss, test_mae, test_mape, test_mse, test_r2_data = model.evaluate(X_test_scaled, y_test_scaled, verbose=0)
    print(test_loss)
    test_losses.append(test_loss)
    test_maes.append(test_mae)
    test_mses.append(test_mse)
    test_mapes.append(test_mape)
    test_r2.append(test_r2_data)

# Calculate average metrics
avg_test_loss = np.mean(test_losses)
avg_test_mae = np.mean(test_maes)
avg_test_mse = np.mean(test_mses)
avg_test_mape = np.mean(test_mapes)
avg_test_r2 = np.mean(test_r2)

But when I run this, my average MSE is 0.030. While when I didn't use K fold, it got as low as 0.014. Now not even one iteration has an MSE lower than 0.020.

Is this a characteristic of K fold cross validation in Keras or am I doing something wrong?

Thanks in advance!

Update: code when I don't use K fold (tried to make almost an exact copy):

# Selecting inputs and outputs
inputs = data[input_columns].select_dtypes(include=[np.number])
outputs = data[output_columns].select_dtypes(include=[np.number])

# Initialize the scaler for inputs and outputs
scaler = MinMaxScaler()
output_scaler = MinMaxScaler()


if True == True:

    # Splitting data
    X_train, X_test, y_train, y_test = train_test_split(inputs, outputs, test_size=0.2, random_state=42)

    # Normalizing inputs
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Normalizing outputs
    y_train_scaled = output_scaler.fit_transform(y_train)
    y_test_scaled = output_scaler.transform(y_test)

    # Model definition
    model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dense(64, activation='relu'),
    Dense(y_train_scaled.shape[1])  # Match number of outputs
    ])

    # Compile the model
    model.compile(optimizer='adam', loss='mse', metrics=['mae', 'mean_squared_error', 'mean_absolute_percentage_error', r_squared])

    # Train the model
    history = model.fit(X_train_scaled, y_train_scaled, epochs=500, batch_size=32, verbose=0, validation_split=0.2)

    # Evaluation
    test_loss, test_mae, test_mape, test_mse, test_r2_data = model.evaluate(X_test_scaled, y_test_scaled, verbose=0)
    print(test_loss)

Solution

Train_test_split of keras shuffles the rows, while KFold doesn't.

Adding this line, makes the performance between the 2 comparable.

data = shuffle(data)