Search code examples
pythonpandasalpha-vantage

Pandas can't find columns, ValueError


import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

symbols = ["AAPL", "GLD", "TSLA", "GBL", "GOOGL"]

def compare_security(symbols):
    start_date = "01-01-2019"
    end_date = "01-12-2020"
    dates = pd.date_range(start_date, end_date)

    df1 = pd.DataFrame(index=dates)
    df_SPY = pd.read_csv(
        "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=SPY&apikey=XXXX&datatype=csv",
        index_col="timestamp", usecols=["timestamp", "adjusted_close"], parse_dates=True, na_values=['nan'])
    df_SPY = df_SPY.rename(columns={"adjusted_close": "SPY"})
    df1 = df1.join(df_SPY, how="inner")

    for symbol in symbols:
        df_temp= pd.read_csv("https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol={}&apikey=XXXX&datatype=csv".format(symbol),
            index_col = "timestamp", usecols = ["timestamp", "adjusted_close"], parse_dates=True, na_values=['nan'])
        df_temp = df_temp.rename(columns={"adjusted_close":symbol})
        df1 = df1.join(df_temp)
    return df1

def test_run():
    df = compare_security(symbols)
    print(df)
    df.plot()
    plt.title(symbols)
    plt.show()

if __name__ == "__main__":
    test_run()

It reads the error "ValueError: Usecols do not match columns, columns expected but not found: ['timestamp', 'adjusted_close']"

However, I checked all the files the code would retrieve and all of them have the respective columns. Any clarification as to where I went wrong would be greatly appreciated.


Solution

  • You're hitting the API limit with a standard key. The standard key is allowed 5 API calls / minute and 500 / day, that's why it works sometimes.

    You can see that if you paste your URL into your browser and refresh it 5 - 10 times in 60 seconds you'll manually hit the limit.

    You can either:

    1. Upgrade to a premium key.
    2. Space out your API calls (wait 60 seconds after you run this to run it again)

    A note on privacy that may also relate to your API threshold hitting. You have publicly shared your API key.

    1. Place your API key in an environment variable
    2. When you post, use "XXXX" or something to that affect as an API key substitute.

    If you publicly share your API key others can use it and means someone else could be using your 5 API calls / minute.

    Sample:

    import numpy as np
    import matplotlib.pyplot as plt
    import pandas as pd
    import os
    
    symbols = ["AAPL", "GLD", "TSLA", "GBL", "GOOGL"]
    
    
    def compare_security(symbols):
        start_date = "01-01-2019"
        end_date = "01-12-2020"
        dates = pd.date_range(start_date, end_date)
    
        df1 = pd.DataFrame(index=dates)
        df_SPY = pd.read_csv(
            "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=SPY&apikey={}&datatype=csv".format(
                os.getenv("ALPHAVANTAGE_API_KEY")),
            index_col="timestamp", usecols=["timestamp", "adjusted_close"], parse_dates=True, na_values=['nan'])
        df_SPY = df_SPY.rename(columns={"adjusted_close": "SPY"})
        df1 = df1.join(df_SPY, how="inner")
    
        for symbol in symbols:
    
            df_temp = pd.read_csv("https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol={}&apikey={}&datatype=csv".format(symbol, os.getenv("ALPHAVANTAGE_API_KEY")),
                                  index_col="timestamp", usecols=["timestamp", "adjusted_close"], parse_dates=True, na_values=['nan'])
            df_temp = df_temp.rename(columns={"adjusted_close": symbol})
            df1 = df1.join(df_temp)
        return df1
    
    
    def test_run():
        df = compare_security(symbols)
        print(df)
        df.plot()
        plt.title(symbols)
        plt.show()
    
    
    if __name__ == "__main__":
        test_run()