Search code examples
pythontensorflowkeras

KerasClassifier error when trying to run a LST Model


I'm trying to import KerasClassifier into VScode but i keep getting this error

ModuleNotFoundError Traceback (most recent call last) Cell In[7], line 1 1 from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

ModuleNotFoundError: No module named 'tensorflow.keras.wrappers'

Does anyone know a way to fix it or another way around it? Here's my full code for context:

import pandas as pd
import re
from sklearn.model_selection import GridSearchCV
import numpy as np
import datetime
from pandas.tseries.offsets import MonthEnd
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

df = pd.read_csv('Billing_Report_w_Revshare Calc.csv')

df['Date'] = pd.to_datetime(df['Date'])
df['Date'] = df['Date'].apply(lambda dt: dt.replace(day=30) if dt.month == 6 else dt)

df = df.dropna(subset=['Domain'])

def modify_url_suffix(url):
    pattern = r'\.(?!net|co|org|news|tv|pr|us|edu)([a-z]+)$'
    modified_url = re.sub(pattern, '.com', url, flags=re.I)
    return modified_url

df['Domain'] = df['Domain'].apply(modify_url_suffix)

df = df[['Date', 'Domain', 'DFP Group Code', 'Program Code', 'Total Agency Ad Requests',
         'Total Agency Impressions', 'Total DFP Impressions', 'Reporting Impressions', 'Total Rev',
         'Total Active View eligible impressions', 'Total Active View measurable impressions',
         'Total Active View viewable impressions', 'Customer Rev Share']]

df = df[df['Date'] > '2017-04-01']
df['Customer Rev Share'] = df['Customer Rev Share'].abs()
df = df.groupby(['Date', 'Domain', 'DFP Group Code', 'Program Code']).sum().reset_index()

def calculate_net_cpm(row):
    try:
        return round((row['Total Rev'] * 1000 / row['Total Agency Impressions']) * 0.78, 2)
    except (ZeroDivisionError, Exception):
        return 0

df['net_cpm'] = df.apply(calculate_net_cpm, axis=1)

last_day_previous_month = pd.to_datetime('today') - pd.tseries.offsets.MonthEnd(1)
previous_month_year = last_day_previous_month.year
previous_month_month = last_day_previous_month.month
df_previous_month = df[(df['Date'].dt.year == previous_month_year) & (df['Date'].dt.month == previous_month_month)]
previous_month_domains = df_previous_month['Domain'].unique()
df = df[df['Domain'].isin(previous_month_domains)]

yup_df = df.loc[(df['Program Code'] == 'YUP') & (df['Date'].dt.year == previous_month_year) & (df['Date'].dt.month == previous_month_month)]
yup_df = yup_df[['Domain']]
df = df[df['Domain'].isin(yup_df['Domain'])]

df = df.sort_values(['Domain', 'Date'])

df = df[['Date', 'Domain', 'Total Agency Ad Requests',
         'Total Agency Impressions', 'Total DFP Impressions', 'Reporting Impressions', 'Total Rev',
         'Total Active View eligible impressions', 'Total Active View measurable impressions',
         'Total Active View viewable impressions', 'Customer Rev Share', 'net_cpm']]

count_per_domain = df['Domain'].value_counts()
sufficient_data_domains = count_per_domain[count_per_domain > 4].index
df_sufficient_data = df[df['Domain'].isin(sufficient_data_domains)]
df_insufficient_data = df[~df['Domain'].isin(sufficient_data_domains)]

df_sufficient_data = df_sufficient_data.sort_values(['Domain', 'Date'], ascending=[True, True])

feature_scaler = MinMaxScaler()
numeric_columns = ['Total Agency Ad Requests', 'Total Agency Impressions', 'Total DFP Impressions', 
                   'Reporting Impressions', 'Total Rev', 'Total Active View eligible impressions', 
                   'Total Active View measurable impressions', 'Total Active View viewable impressions', 
                   'Customer Rev Share']

df_sufficient_data[numeric_columns] = feature_scaler.fit_transform(df_sufficient_data[numeric_columns])

time_steps = 5

def reshape_to_3d(arr, time_steps):
    x = []
    for i in range(len(arr) - time_steps):
        x.append(arr[i: (i + time_steps)])
    return np.array(x)

X = []  
y = []  

encoder = LabelEncoder()
encoder.fit(df_sufficient_data['net_cpm'])
num_classes = len(encoder.classes_)

for domain in df_sufficient_data['Domain'].unique():
    domain_data = df_sufficient_data[df_sufficient_data['Domain'] == domain]
    domain_data = domain_data.drop(['Date', 'Domain'], axis=1)
    
    domain_data[numeric_columns] = feature_scaler.transform(domain_data[numeric_columns])

    encoded_y = encoder.transform(domain_data['net_cpm'])
    dummy_y = to_categorical(encoded_y, num_classes=num_classes)
    
    X_domain = domain_data.drop('net_cpm', axis=1).values
    
    X_domain_3d = reshape_to_3d(X_domain, time_steps)
    y_domain = dummy_y[time_steps:]

    if X_domain_3d.ndim == 3 and y_domain.ndim == 2:
        X.append(X_domain_3d)
        y.append(y_domain)

X = np.concatenate(X)
y = np.concatenate(y)

print("Shape of X:", X.shape)
print("Shape of y:", y.shape)

def create_model(lstm_units=50, learning_rate=0.01):
    model = Sequential()
    model.add(LSTM(lstm_units, activation='relu', input_shape=(5, 9)))
    model.add(Dense(580, activation='softmax'))

    optimizer = Adam(learning_rate=learning_rate)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

model = KerasClassifier(build_fn=create_model, epochs=10, batch_size=32, verbose=1)

param_grid = {'lstm_units': [30, 50, 70], 'learning_rate': [0.001, 0.01, 0.1]}

grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, y)

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

final_model = create_model(lstm_units=grid_result.best_params_['lstm_units'], learning_rate=grid_result.best_params_['learning_rate'])

final_model.fit(X, y, epochs=10, batch_size=32, validation_split=0.2)

I've tried creating my own custom wrapper but i'm having issues with that.


Solution

  • There is a high chance that there is a version mismatch between keras and tensorflow, refer here. As you have not provided the information about platform and library versions, I can't make sure of that, but I will encourage you to include details about the platform and the library versions used, so correct answers can be provided.

    Answer: Make sure the correct keras version according to tensorflow version is installed.