Search code examples
pythonmatplotliblinear-regressioncurve

How to draw the linear regression curve


I have just started learning Python and am wondering how I can draw the linear regression curve with time series of price data(for example, close prices, which has only y factors)

import pandas as pd
import pandas_datareader.data as web
import matplotlib.pyplot as plt
from datetime import datetime

start=datetime(2015,1,1)
end=datetime(2015,12,31)

df = web.DataReader("AMZN", "yahoo", start, end)
close = df['Close']

I referred to this web page to grasp the basic idea of drawing the linear regression curve, but I don't know what functions to use to write it again in python.


Solution

  • Try with this:

    import pandas_datareader.data as web
    from datetime import datetime
    import numpy as np
    import matplotlib.pyplot as plt
    import statsmodels.api as sm
    
    start = datetime(2015, 1, 1)
    end = datetime(2015, 12, 31)
    
    df = web.DataReader("AMZN", "yahoo", start, end)
    
    df['day'] = df.index.map(lambda observation_timestamp: observation_timestamp.dayofyear)
    y = df.Close
    X = df.day
    X = sm.add_constant(X)
    est = sm.OLS(y, X)
    est = est.fit()
    
    X_prime = np.linspace(X.day.min(), X.day.max(), 100)
    X_prime = sm.add_constant(X_prime)
    y_hat = est.predict(X_prime)
    
    plt.plot(X_prime[:,1], y_hat)
    plt.scatter(X.day, y)
    plt.show()
    

    enter image description here

    execute this est.summary():

                                OLS Regression Results                            
    ==============================================================================
    Dep. Variable:                  Close   R-squared:                       0.935
    Model:                            OLS   Adj. R-squared:                  0.934
    Method:                 Least Squares   F-statistic:                     3570.
    Date:                Mon, 05 Dec 2016   Prob (F-statistic):          5.06e-150
    Time:                        00:27:53   Log-Likelihood:                -1199.8
    No. Observations:                 252   AIC:                             2404.
    Df Residuals:                     250   BIC:                             2411.
    Df Model:                           1                                         
    Covariance Type:            nonrobust                                         
    ==============================================================================
                     coef    std err          t      P>|t|      [95.0% Conf. Int.]
    ------------------------------------------------------------------------------
    const        289.9491      3.622     80.053      0.000       282.816   297.083
    day            1.0212      0.017     59.748      0.000         0.988     1.055
    ==============================================================================
    Omnibus:                       15.313   Durbin-Watson:                   0.117
    Prob(Omnibus):                  0.000   Jarque-Bera (JB):                6.134
    Skew:                           0.007   Prob(JB):                       0.0466
    Kurtosis:                       2.236   Cond. No.                         429.
    ==============================================================================
    

    another example:

    import pandas_datareader.data as web
    from datetime import datetime
    import statsmodels.api as sm
    from patsy.highlevel import dmatrices
    
    import matplotlib.pyplot as plt
    
    start = datetime(2015, 1, 1)
    end = datetime(2015, 12, 31)
    
    df = web.DataReader("AMZN", "yahoo", start, end)
    
    df['day'] = df.index.map(lambda observation_timestamp: observation_timestamp.dayofyear)
    
    y, X = dmatrices('Close ~ day', data=df, return_type='dataframe')
    
    mod = sm.OLS(y, X)
    
    res = mod.fit()
    sm.stats.linear_rainbow(res)
    sm.graphics.plot_regress_exog(res, "day")
    plt.show()
    

    enter image description here

    changed sm.graphics.plot_regress_exog(res, "day") to sm.graphics.plot_fit(res, "day")

    enter image description here

    execute this: res.summary()

                                OLS Regression Results                            
    ==============================================================================
    Dep. Variable:                  Close   R-squared:                       0.935
    Model:                            OLS   Adj. R-squared:                  0.934
    Method:                 Least Squares   F-statistic:                     3570.
    Date:                Mon, 05 Dec 2016   Prob (F-statistic):          5.06e-150
    Time:                        00:26:04   Log-Likelihood:                -1199.8
    No. Observations:                 252   AIC:                             2404.
    Df Residuals:                     250   BIC:                             2411.
    Df Model:                           1                                         
    Covariance Type:            nonrobust                                         
    ==============================================================================
                     coef    std err          t      P>|t|      [95.0% Conf. Int.]
    ------------------------------------------------------------------------------
    Intercept    289.9491      3.622     80.053      0.000       282.816   297.083
    day            1.0212      0.017     59.748      0.000         0.988     1.055
    ==============================================================================
    Omnibus:                       15.313   Durbin-Watson:                   0.117
    Prob(Omnibus):                  0.000   Jarque-Bera (JB):                6.134
    Skew:                           0.007   Prob(JB):                       0.0466
    Kurtosis:                       2.236   Cond. No.                         429.
    ==============================================================================