Search code examples
pythonparsingyahoo-finance

parsing yahoo finance python httperror 502


I have the following codes for parsing yahoo finance to back-up which i am running with s&p500. it stops with an error -HTTP Error 502: Server Hangup only after 20 stocks.. does anyone know a better way to parse yahoo finance or fix this problem?

try:
    for stock in sp500:

        save_path = location+'\\_KeyStats\\'+stock
        name_of_file = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
        completeName = os.path.join(save_path, name_of_file+".html")         
        file1 = open(completeName, "w")
        keyStat = urllib2.urlopen('https://au.finance.yahoo.com/q/ks?s='+stock).read()
        file1.write(keyStat)
        file1.close()

        #income Statement 
        save_path = location+'\\_AnnualEarnings\\'+stock
        name_of_file = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
        completeName = os.path.join(save_path, name_of_file+".html")         
        file1 = open(completeName, "w")
        incomeState = urllib2.urlopen('https://au.finance.yahoo.com/q/is?s='+stock+'&annual').read()
        file1.write(incomeState)
        file1.close()

        save_path = location+'\\_QuarterlyEarnings\\'+stock
        name_of_file = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
        completeName = os.path.join(save_path, name_of_file+".html")         
        file1 = open(completeName, "w")
        incomeState2 = urllib2.urlopen('https://au.finance.yahoo.com/q/is?s='+stock).read()
        file1.write(incomeState2)
        file1.close()

        #Balance Sheet 
        save_path = location+'\\_AnnaulBS\\'+stock
        name_of_file = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
        completeName = os.path.join(save_path, name_of_file+".html")         
        file1 = open(completeName, "w")
        blanceSheet = urllib2.urlopen('https://au.finance.yahoo.com/q/bs?s='+stock+'&annual').read()
        file1.write(blanceSheet)
        file1.close()

        save_path = location+'\\_QuarterlyBS\\'+stock
        name_of_file = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
        completeName = os.path.join(save_path, name_of_file+".html")         
        file1 = open(completeName, "w")
        blanceSheet2 = urllib2.urlopen('https://au.finance.yahoo.com/q/bs?s='+stock).read()
        file1.write(blanceSheet2)
        file1.close()

        #Cash Flow
        save_path = location+'\\_AnnaulCF\\'+stock
        name_of_file = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
        completeName = os.path.join(save_path, name_of_file+".html")         
        file1 = open(completeName, "w")
        cashFlow = urllib2.urlopen('https://au.finance.yahoo.com/q/cf?s='+stock+'&annual').read()
        file1.write(cashFlow)
        file1.close()

        save_path = location+'\\_QuarterlyCF\\'+stock
        name_of_file = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
        completeName = os.path.join(save_path, name_of_file+".html")         
        file1 = open(completeName, "w")
        cashFlow2 = urllib2.urlopen('https://au.finance.yahoo.com/q/cf?s='+stock).read()
        file1.write(cashFlow2)
        file1.close()
        print stock

except Exception, e:
    print 'failed main loop', str(e)

Solution

  • You should use pandas for this. Imagine you have a file with all stocks:

    sp500.txt

    AAPL
    GLD
    SPX
    MCD
    

    Now you can do:

    from pandas.io.data import DataReader
    from pandas import Panel, DataFrame
    import datetime
    
    start = datetime.datetime(2010, 1, 1)
    end = datetime.datetime(2013, 1, 27)
    
    with open('sp500.txt') as f:
        symbols = f.read().splitlines()  # ['AAPL', 'GLD', 'SPX', 'MCD']
    
    data = dict((symbol, DataReader(symbol, "yahoo", start, end, pause=1)) for symbol in symbols)
    panel = Panel(data).swapaxes('items', 'minor')
    closing = panel['Close'].dropna()
    print closing.head()
    

    Output:

                AAPL    GLD     MCD     SPX
    Date                
    2010-01-04  214.01  109.80  62.78   0.03
    2010-01-05  214.38  109.70  62.30   0.03
    2010-01-06  210.97  111.51  61.45   0.03
    2010-01-07  210.58  110.82  61.90   0.03
    2010-01-08  211.98  111.37  61.84   0.04
    

    Note the pause=1 in the DataReader call to avoid hitting the API limit. If you want to save the results to a file you can use:

    closing.to_csv('output.csv')