Search code examples
pythondatetimeannotations

Annoting date on chart


I am trying to add a straight line down which would have date printed vertically on the line. I have added a picture of how i am trying to accomplish this below. I have also included the code which i am trying to annotate with.

My code:

import pandas as pd
from pandas import datetime
from pandas import DataFrame as df
import matplotlib
from pandas_datareader import data as web
import matplotlib.pyplot as plt
import datetime
import numpy as np

start = datetime.date(2015,1,1)
end = datetime.date.today()
start1 = datetime.date(2019,1,1)

data = web.DataReader("^GSPC", 'yahoo',start, end)
data1 = web.DataReader("^GSPC", 'yahoo', start1, end)

data.index = pd.to_datetime(data.index, format ='%Y-%m-%d')
data1.index = pd.to_datetime(data1.index, format ='%Y-%m-%d')

full_dates = pd.date_range(start, end)
data = data.reindex(full_dates)
data1 = data1.reindex(full_dates)

data.set_index('month',append=True,inplace=True)
data1.set_index('month',append=True,inplace=True)
data.set_index('week',append=True,inplace=True)
data1.set_index('week',append=True,inplace=True)
data.set_index('day',append=True,inplace=True)
data1.set_index('day',append=True,inplace=True)

data['pct_day']= data['Adj Close'].pct_change()
data1['pct_day']= data1['Adj Close'].pct_change()

df = data.groupby(['month', 'day']).mean()
df2 = data1.groupby(['month', 'day']).mean()

df['cumsum_pct_day']=df['pct_day'].cumsum(axis = 0)
df2['cumsum_pct_day']=df2['pct_day'].cumsum(axis = 0)

ax = df.plot(y='cumsum_pct_day', grid = True, label='df')
df2.plot(y='cumsum_pct_day', grid= True, ax=ax, label='df2')

ylims = ax.get_ylim()
ax.vlines(end, ylims[0], data1.Close[0], linestyles='--')
ax.text(end, data1.Close[0], end, ha='right', va='top', rotation=90)
ax.set_ylim(ylims)


plt.show()

enter image description here

For some reason i am getting warning :

enter image description here

But the line is not plotting. Could you advise why it is not doing so?


Solution

  • Instead of vlines that plots multiple lines, you can use the method axvline that adds a single vertical line:

    from datetime import timedelta
    
    def gen_df(size):
        arr1 = pd.date_range(start='1/1/2018', periods=size)
        arr2 = np.random.exponential(size=size).cumsum()
        return pd.DataFrame({'col1': arr1, 'col2': arr2})
    
    df1 = gen_df(60)
    df2 = gen_df(50)
    
    ax = df1.plot(x='col1', y='col2', label='df1')
    df2.plot(x='col1', y='col2', ax=ax, label='df2')
    ax.axvline(x=df2['col1'].max(), color='red')
    ax.annotate(s=df2['col1'].max().date(), xy=(df2['col1'].max()-timedelta(days=2), 35), rotation=90)
    

    enter image description here

    Let's reproduce your data set:

    def gen_df(size):
        arr1 = pd.date_range(start='1/1/2018', periods=size)
        arr2 = np.random.standard_exponential(size)
        return pd.DataFrame({'Adj Close': arr2}, index=arr1)
    
    df1 = gen_df(150)
    df2 = gen_df(130)
    
    print(df1.head())
    

    Output:

                Adj Close
    2018-01-01   0.061166
    2018-01-02   0.669330
    2018-01-03   0.123332
    2018-01-04   0.029007
    2018-01-05   1.024210
    

    for df in [df1, df2]:
        df['year'] = df.index.year
        df['month'] = df.index.month
        df['week'] = df.index.week
        df['day'] = df.index.day
        df.set_index('month', append=True, inplace=True)
        df.set_index('week', append=True, inplace=True)
        df.set_index('day', append=True, inplace=True)
        df['pct_day']= df['Adj Close'].pct_change()
    
    print(df1.head())
    

    Output:

                               Adj Close  year    pct_day
               month week day                            
    2018-01-01 1     1    1     0.061166  2018        NaN
    2018-01-02 1     1    2     0.669330  2018   9.942917
    2018-01-03 1     1    3     0.123332  2018  -0.815739
    2018-01-04 1     1    4     0.029007  2018  -0.764804
    2018-01-05 1     1    5     1.024210  2018  34.308892
    

    df1 = df1.groupby(['month', 'day']).mean()
    df1['cumsum_pct_day'] = df1['pct_day'].cumsum(axis = 0)
    
    df2 = df2.groupby(['month', 'day']).mean()
    df2['cumsum_pct_day'] = df2['pct_day'].cumsum(axis = 0)
    
    print(df1.head())
    

    Output:

               Adj Close  year    pct_day  cumsum_pct_day
    month day                                            
    1     1     0.061166  2018        NaN             NaN
          2     0.669330  2018   9.942917        9.942917
          3     0.123332  2018  -0.815739        9.127178
          4     0.029007  2018  -0.764804        8.362375
          5     1.024210  2018  34.308892       42.671267
    

    Add a single line with axvline:

    ax = df1.plot(y ='cumsum_pct_day', label='df1')
    df2.plot(y ='cumsum_pct_day', ax=ax, label='df2')
    
    df = df1 if len(df1) < len(df2) else df2 # get a smaller DataFrame
    ax.axvline(x=len(df), color='red')
    
    y_min, y_max = ax.get_ylim()
    middle = (y_max - y_min) / 2
    ax.annotate(s=df.index.max(), xy=(len(df) - 5, middle), rotation=90)
    

    axvline

    Add multiple lines with vlines:

    ax = df1.plot(y ='cumsum_pct_day', label='df1')
    df2.plot(y ='cumsum_pct_day', ax=ax, label='df2')
    
    y_min, y_max = ax.get_ylim()
    ax.vlines(x=np.arange(len(df2), len(df1), step=.1), ymin=y_min, ymax=y_max, color='red')
    
    middle = (y_max - y_min) / 2
    ax.annotate(s=df.index.max(), xy=(len(df) - 5, middle), rotation=90)
    

    enter image description here