Search code examples
pythonmachine-learningscikit-learnlinear-regression

How can I get r_value from this linear regression?


I'm using the following python script to predict a number using liner regression

from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from sklearn.linear_model import LinearRegression
import time
import numpy as np
from sklearn.svm import SVR
import pytz
from datetime import datetime
from sys import argv, exit
import os, psutil

################################################
if len(argv) != 5:
  print (argv[0] + '<train count> <timeout(s)> <predict date(Y/M/D)> <predict clock(H:M:S)>')
  exit(2)
X_predict = [(int(datetime.strptime(argv[3] + " " + argv[4], '%Y/%m/%d %H:%M:%S').timestamp()*(10000000)))]
################################################

X=[]
y=[]
chromeOptions = webdriver.ChromeOptions()
chromeOptions.add_argument("--headless")
chromeOptions.add_argument("--remote-debugging-port=2212")
chromeOptions.add_argument('--no-sandbox')
chromeOptions.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome('/usr/bin/chromedriver',chrome_options=chromeOptions)
driver.get('https://sample.com/')

elem_xpath = '//div[contains(text(), "number")]/following-sibling::div'

for i in range(1, int(argv[1])):
    try:
        elem = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, elem_xpath)))
        print ("train => ", i)
        X.append(int(time.time()*(10000000)))
        y.append(int(elem.text.replace(',', '')))
        time.sleep(int(argv[2]))
    finally:
        driver.quit
        
##############################################
X = np.array(X).reshape(-1, 1)
y = np.array(y).reshape(-1, 1)
X_predict = np.array(X_predict).reshape(-1, 1)
##############################################
    
svr_rbf = LinearRegression()
y_rbf = svr_rbf.fit(X,y).predict(X_predict)


print ('y_rbf: {}'.format(int(y_rbf)))
print('memory usage: {} MB'.format(
int(psutil.Process(os.getpid()).memory_info().rss/1024/1024)
)) 

The code works well. As far as I know there are some outputs we can get from a liner regression such as slope, intercept, r_value, p_value and std_err. r_value will tell you accuracy of the liner regression. On the script above, how can I get the value of r_value?


Solution

  • It's a little hard to tell, but I think you are looking for something like this.

    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    
    import statsmodels.api as sm
    from statsmodels.sandbox.regression.predstd import wls_prediction_std
    
    np.random.seed(9876789)
    
    # OLS estimation¶
    # Artificial data:
    
    nsample = 100
    x = np.linspace(0, 10, 100)
    X = np.column_stack((x, x**2))
    beta = np.array([1, 0.1, 10])
    e = np.random.normal(size=nsample)
    
    # Our model needs an intercept so we add a column of 1s:
    X = sm.add_constant(X)
    y = np.dot(X, beta) + e
    
    # Fit and summary:
    model = sm.OLS(y, X)
    results = model.fit()
    print(results.summary())
    

    Result:

    enter image description here