I've amended the code found here. But i'm getting a dimension error in my in input, like below:
ValueError: Error when checking input: expected InputLayer to have 4 dimensions, but got array with shape (None, None)
This is my modified code (i'm running this on Colab):
#Power data classification/regression with CNN
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import csv as csv
import keras.backend as K
from sklearn.preprocessing import MinMaxScaler # For normalizing data
print("TensorFlow version:",tf.__version__)
!wget https://raw.githubusercontent.com/sibyjackgrove/CNN-on-Wind-Power-Data/master/MISO_power_data_classification_labels.csv
!wget https://raw.githubusercontent.com/sibyjackgrove/CNN-on-Wind-Power-Data/master/MISO_power_data_input.csv
#Read total rows in csv file without loading into memory
def data_set_size(csv_file):
with open(csv_file) as csvfile:
csv_rows = 0
for _ in csvfile:
csv_rows += 1
return csv_rows-1 #Remove header from count and return
csv_file = "./MISO_power_data_classification_labels.csv"
n_train = data_set_size(csv_file)
print("Training data set size:",n_train)
#Python generator to supply batches of traning data during training with loading full data set to memory
def power_data_generator(batch_size,gen_type=''):
valid_size = max(1,np.int(0.2*batch_size))
while 1:
df_input=pd.read_csv('./MISO_power_data_input.csv',usecols =['Wind_MWh','Actual_Load_MWh'],chunksize =24*(batch_size+valid_size), iterator=True)
df_target=pd.read_csv('./MISO_power_data_classification_labels.csv',usecols =['Mean Wind Power','Standard Deviation','WindShare'],chunksize =batch_size+valid_size, iterator=True)
for chunk, chunk2 in zip(df_input,df_target):
scaler = MinMaxScaler() # Define limits for normalize data
InputX = chunk.values
InputX = scaler.fit_transform(InputX) # Normalize input data
InputY = chunk2.values
InputY = scaler.fit_transform(InputY) # Normalize output data
if gen_type =='training':
yield (InputX[0:batch_size],InputY[0:batch_size])
elif gen_type =='validation':
yield (InputX[batch_size:batch_size+valid_size],InputY[batch_size:batch_size+valid_size])
#Define model using Keras
Yclasses = 3 #Number of output classes
def nossa_metrica(y_true, y_pred):
diff = y_true - y_pred
count = K.sum(K.cast(K.equal(diff, K.zeros_like(diff)), 'int8')) # Count how many times y_true = y_pred
return count/n_train
model = keras.Sequential([
tf.keras.layers.Input(shape=(2,24,1),name='InputLayer'),
tf.keras.layers.Conv2D(filters=4,kernel_size=(2,6),strides=(1,1),activation='relu',name='ConvLayer1'),
tf.keras.layers.Conv2D(filters=4,kernel_size=(1,6),strides=(1,1),activation='relu',name='ConvLayer2'),
tf.keras.layers.Flatten(name="Flatten"),
tf.keras.layers.Dense(units = 8,activation='relu',name='FeedForward1'),
tf.keras.layers.Dense(units = Yclasses,name='OutputLayer'),
])
model.compile(loss='mse',optimizer='adam',verbose = 2,metrics = [nossa_metrica])
model.summary()
samples_per_batch = 5
train_generator= power_data_generator(batch_size=samples_per_batch,gen_type='training')
valid_generator= power_data_generator(batch_size=samples_per_batch,gen_type='validation')
number_of_batches = np.int32(n_train/(samples_per_batch+max(1,np.int32(0.2*samples_per_batch))))
#Training starts
history = model.fit(train_generator, steps_per_epoch= number_of_batches,epochs=200,validation_data=valid_generator, validation_steps=number_of_batches,verbose=2)
If anyone can shed some light here, I would be really grateful!
In
tf.keras.layers.Input(shape=(2,24,1),name='InputLayer')
you're specifying that the inputs to your model, i.e. the first argument passed to model.fit
should have shape (?, 2, 24, 1)
, but that's not what you're passing. Indeed, next(train_generator)
produces the following output:
(array([[0.62840991, 0.36867201],
[0.68026787, 0.32275764],
[0.67140497, 0.30866827],
[0.61158515, 0.32725069],
[0.57037451, 0.41795902]]),
array([[0.0301671 , 1. , 0.00581285],
[0. , 0.18781352, 0. ],
[0.12077826, 0.3356642 , 0.19676627],
[0.56275038, 0.8747475 , 0.69121483],
[1. , 0. , 1. ]]))
which is a tuple of arrays of shapes (5, 2)
and (5, 3)
respectively.
In the notebook you're referring to, they're explicitly setting up inputs of the desired shape through
InputX = np.resize(InputX,(batch_size+valid_size,24,2,1))
but that's not part of your code.