I'm using the following python script to predict a number using liner regression
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from sklearn.linear_model import LinearRegression
import time
import numpy as np
from sklearn.svm import SVR
import pytz
from datetime import datetime
from sys import argv, exit
import os, psutil
################################################
if len(argv) != 5:
print (argv[0] + '<train count> <timeout(s)> <predict date(Y/M/D)> <predict clock(H:M:S)>')
exit(2)
X_predict = [(int(datetime.strptime(argv[3] + " " + argv[4], '%Y/%m/%d %H:%M:%S').timestamp()*(10000000)))]
################################################
X=[]
y=[]
chromeOptions = webdriver.ChromeOptions()
chromeOptions.add_argument("--headless")
chromeOptions.add_argument("--remote-debugging-port=2212")
chromeOptions.add_argument('--no-sandbox')
chromeOptions.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome('/usr/bin/chromedriver',chrome_options=chromeOptions)
driver.get('https://sample.com/')
elem_xpath = '//div[contains(text(), "number")]/following-sibling::div'
for i in range(1, int(argv[1])):
try:
elem = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, elem_xpath)))
print ("train => ", i)
X.append(int(time.time()*(10000000)))
y.append(int(elem.text.replace(',', '')))
time.sleep(int(argv[2]))
finally:
driver.quit
##############################################
X = np.array(X).reshape(-1, 1)
y = np.array(y).reshape(-1, 1)
X_predict = np.array(X_predict).reshape(-1, 1)
##############################################
svr_rbf = LinearRegression()
y_rbf = svr_rbf.fit(X,y).predict(X_predict)
print ('y_rbf: {}'.format(int(y_rbf)))
print('memory usage: {} MB'.format(
int(psutil.Process(os.getpid()).memory_info().rss/1024/1024)
))
The code works well. As far as I know there are some outputs we can get from a liner regression such as slope, intercept, r_value, p_value and std_err. r_value will tell you accuracy of the liner regression. On the script above, how can I get the value of r_value?
It's a little hard to tell, but I think you are looking for something like this.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.sandbox.regression.predstd import wls_prediction_std
np.random.seed(9876789)
# OLS estimation¶
# Artificial data:
nsample = 100
x = np.linspace(0, 10, 100)
X = np.column_stack((x, x**2))
beta = np.array([1, 0.1, 10])
e = np.random.normal(size=nsample)
# Our model needs an intercept so we add a column of 1s:
X = sm.add_constant(X)
y = np.dot(X, beta) + e
# Fit and summary:
model = sm.OLS(y, X)
results = model.fit()
print(results.summary())
Result: