Search code examples
python-3.xscikit-learnmlp

How do I use the MLP score function? Error: shapes (295,1) and (7,450) not aligned: 1 (dim 1) != 7 (dim 0)


I have recently begun coding deep neural networks in Python and I have been stuck with this problem for weeks. I have checked other similar questions but could not grasp the solution.

I have a feed forward neural network and I am trying to obtain the R^2 value for my model. I have a dataframe of 1031 rows by 9 columns. I am fitting the first 7 columns (storm characteristics) with the water elevation as given in 8th or 9th column. Here 'h' is column heading that I am fitting with the first 7 columns.

See csv data at this link.

### Load dependencies
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.metrics import r2_score

data = pd.read_csv (r'C:\Users\SM\Desktop\dataframe.csv')

### Define MLP Hyper-parameters

n = 20 # ----------- Number of layers
p = 450 # ---------- Number of perceptrons in each layer
i = 1 # ------------ Initializing iteration count
MLPsize = []

### While loop to create tuple for MLP hyperparameter input
while i <= n:
    MLPsize.append(p)
    i += 1

Model = MLPRegressor(hidden_layer_sizes=tuple(MLPsize), 
                     activation='logistic', solver='adam', alpha=0.0001, 
                     batch_size='auto', learning_rate='adaptive', learning_rate_init=0.001, 
                     power_t=0.5, max_iter=5000, shuffle=False, random_state=None, tol=0.0001, 
                     verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, 
                     early_stopping=False, validation_fraction=0.5,
                     beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10)

### Define function to run train-test simulation

msk = np.random.rand(len(data)) < 0.7
Train = data[msk]
Test = data[~msk]

Train = Train.reset_index(drop=True)
Test = Test.reset_index(drop=True)

def RunModel (h):
    global Train_pred
    global Test_pred
    global Train_true
    global Test_true
    global Train_error
    global Test_error
    Train_error = []
    Test_error = []

    Model.fit(Train.iloc[:,:7], Train[h])
    Train_pred = Model.predict(Train.iloc[:,:7])
    Train_error.append(mean_squared_error(Train[h], Train_pred))
    Train_true = np.array(Train[h]).reshape(-1, 1)
    Train_pred = Train_pred.reshape(-1, 1)

    Test_pred = Model.predict(Test.iloc[:,:7])
    Test_error.append(mean_squared_error(Test[h], Test_pred))
    Test_true = np.array(Test[h]).reshape(-1, 1)
    Test_pred = Test_pred.reshape(-1, 1)

    print(mean_squared_error(Train_pred, Train_true))

    print(Model.score(Test_pred, Test_true))

RunModel('11939')

And I get an error:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-23-01acc869ac62> in <module>
----> 1 Model.score(Test_pred, Test_true)

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\base.py in score(self, X, y, sample_weight)
    406         from .metrics import r2_score
    407         from .metrics.regression import _check_reg_targets
--> 408         y_pred = self.predict(X)
    409         # XXX: Remove the check in 0.23
    410         y_type, _, _, _ = _check_reg_targets(y, y_pred, None)

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\extmath.py in safe_sparse_dot(a, b, dense_output)
    140         return ret
    141     else:
--> 142         return np.dot(a, b)
    143 
    144 

**ValueError: shapes (295,1) and (7,450) not aligned: 1 (dim 1) != 7 (dim 0)**

Please help. Any detailed explanation would be appreciated.


Solution

  • The line that raises the error is the following:

    print(Model.score(Test_pred, Test_true))
    

    This is not valid since .score() method of the MLPRegressor takes as inputs (X,y). Read more here: https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html#sklearn.neural_network.MLPRegressor.score

    So, if you want to estimate the R2 coefficient of the model you need to pass the Xtest and ytest that in your case are Test.iloc[:,:7], Test_true.

    ### Load dependencies
    import pandas as pd
    import numpy as np
    from sklearn.neural_network import MLPRegressor
    from sklearn.metrics import accuracy_score, mean_squared_error
    from sklearn.model_selection import cross_val_score
    from sklearn.metrics import r2_score
    
    data = pd.read_csv (r'C:\Users\SM\Desktop\dataframe.csv')
    
    ### Define MLP Hyper-parameters
    
    n = 20 # ----------- Number of layers
    p = 450 # ---------- Number of perceptrons in each layer
    i = 1 # ------------ Initializing iteration count
    MLPsize = []
    
    ### While loop to create tuple for MLP hyperparameter input
    while i <= n:
        MLPsize.append(p)
        i += 1
    
    Model = MLPRegressor(hidden_layer_sizes=tuple(MLPsize), 
                         activation='logistic', solver='adam', alpha=0.0001, 
                         batch_size='auto', learning_rate='adaptive', learning_rate_init=0.001, 
                         power_t=0.5, max_iter=5000, shuffle=False, random_state=None, tol=0.0001, 
                         verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, 
                         early_stopping=False, validation_fraction=0.5,
                         beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10)
    
    ### Define function to run train-test simulation
    
    msk = np.random.rand(len(data)) < 0.7
    Train = data[msk]
    Test = data[~msk]
    
    Train = Train.reset_index(drop=True)
    Test = Test.reset_index(drop=True)
    
    def RunModel (h):
        global Train_pred
        global Test_pred
        global Train_true
        global Test_true
        global Train_error
        global Test_error
        Train_error = []
        Test_error = []
    
        Model.fit(Train.iloc[:,:7], Train[h])
        Train_pred = Model.predict(Train.iloc[:,:7])
        Train_error.append(mean_squared_error(Train[h], Train_pred))
        Train_true = np.array(Train[h]).reshape(-1, 1)
        Train_pred = Train_pred.reshape(-1, 1)
    
        Test_pred = Model.predict(Test.iloc[:,:7])
        Test_error.append(mean_squared_error(Test[h], Test_pred))
        Test_true = np.array(Test[h]).reshape(-1, 1)
        Test_pred = Test_pred.reshape(-1, 1)
    
        print(mean_squared_error(Train_pred, Train_true))
    
        print(Model.score(Test.iloc[:,:7], Test_true))
    
    RunModel('11939')
    

    Prints:

    0.5131277608869395
    -0.02165748764016695