Search code examples
pythonpandasscikit-learnsklearn-pandas

How to avoid Collection Error Python Numpy


I am trying to train a Linear Regression Qualifier to continue a grap. I have a couple of thousand lines of data in my csv file that I import into numpy arrays. Here is my code :

import pandas as pd 
import numpy as np 
from matplotlib import pyplot as plt 
import csv
import math
from sklearn import preprocessing, svm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

def predict():
    sample_data = pd.read_csv("includes\\csv.csv")
    x = np.array(sample_data["day"])
    y = np.array(sample_data["balance"])

    for x in x:
        x = x.reshape(1, -1)
        #lol

    for y in y:
        y.reshape(1, -1)
        #lol

    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

    clf = LinearRegression()
    clf.fit(x_train, y_train)
    clf.score(x_test, y_test)

When I run this, the error is:

TypeError: Singleton array 6014651 cannot be considered a valid collection.

Any ideas why that's a thing?


Solution

  • After discussion in comments:

    import pandas as pd 
    import numpy as np 
    from matplotlib import pyplot as plt 
    import csv
    import math
    from sklearn import preprocessing, svm
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LinearRegression
    
    def predict():
        sample_data = pd.read_csv("includes\\csv.csv")
        x = np.array(sample_data["day"])
        y = np.array(sample_data["balance"])
    
        x = x.reshape(-1,1)
    
        y = y.reshape(-1,1)
    
        X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
    
        clf = LinearRegression()
        clf.fit(X_train, y_train)
        clf.score(X_test, y_test)