Search code examples
pythonyahoo-finance

python yahoo finance format


so I did this:

def get_quotes(ticker:str, start_date:datetime.date, end_date:datetime.date)->list:
'''Downloads the quotes from Yahoo finance'''


start_month = str(start_date.month-1)
start_day   = str(start_date.day)
start_year  = str(start_date.year)

end_month   = str(end_date.month-1)
end_day     = str(end_date.day)
end_year    = str(end_date.year)

try:
    list = []
    quote = 'http://ichart.yahoo.com/table.csv?s='+ticker+'&a'+start_month+'&b='+start_day+"&c="+start_year+'&d='+end_month+'&e='+ end_day +'&f='+end_year+'&g=d'
    response = urllib.request.urlopen(quote) 
    data = response.read()
    string_data = data.decode(encoding='utf-8')
    lines = string_data.splitlines()
    for x in lines:
        data = [y for y in x.split(',')]
        list.append(data[0:5])
    return list

except URLError:
    print('Page not found! Please enter a valid ticker')

But the resulting list is: [['Date', 'Open', 'High', 'Low', 'Close'], ['2011-10-10', '26.58', '26.97', '26.47',
'26.94'], ['2011-10-07', '26.34', '26.51', '26.20', '26.25'], ['2011-10-06', '25.90', '26.40', '25.70', '26.34']]

when it should be: ['Date', 'Open', 'High', 'Low', 'Close'], ['2011-10-10', '26.58', '26.97', '26.47', '26.94'], ['2011-10-07', '26.34', '26.51', '26.20', '26.25'], ['2011-10-06', '25.90', '26.40', '25.70', '26.34']

Can i eliminate the double list somehow?


Solution

  • Is this what you're looking for?

    rows = ['Date,Open,High,Low,Close,Volume,Adj Close', '2012-11-30,691.31,699.22,685.69,698.37,3163600,698.37', '2012-11-29,687.78,693.90,682.00,691.89,2776500,691.89','2012-11-28,668.01,684.91,663.89,683.67,3042000,683.67', '2012-11-27,660.17,675.00,658.00,670.71,2508700,670.71']
    
    def format_rows(rows, gap):
        split_rows = [row.split(',') for row in rows]
        # Splits each row up, by comma
        column_lengths = [max(col_len) for col_len in zip(*[map(len, row) for row in split_rows])]
        # Finds the maximum size of each column
    
        for row in split_rows:
            col_lengths = zip(row, column_lengths)
            print ''.join(col.ljust(col_length + gap, ' ') for (col, col_length) in col_lengths)
            # Prints out the data, making sure there's a minimum of "gap" spaces 
            # between each column
    

    Doing format_rows(rows, 4) will result in the following table being printed out, with a gap of 4 spaces between each column:

    Date          Open      High      Low       Close     Volume     Adj Close
    2012-11-30    691.31    699.22    685.69    698.37    3163600    698.37
    2012-11-29    687.78    693.90    682.00    691.89    2776500    691.89
    2012-11-28    668.01    684.91    663.89    683.67    3042000    683.67
    2012-11-27    660.17    675.00    658.00    670.71    2508700    670.71
    

    You can modify the code so it returns a string instead by doing this:

    def format_rows(rows, gap):
        split_rows = [row.split(',') for row in rows]
        # Splits each row up, by comma
        column_lengths = [max(col_len) for col_len in zip(*[map(len, row) for row in split_rows])]
        # Finds the maximum size of each column
    
        output = []
        for row in split_rows:
            col_lengths = zip(row, column_lengths)
            output.append(''.join(col.ljust(col_length + gap, ' ') for (col, col_length) in col_lengths))
        return '\n'.join(output)
    

    Edit:

    If you want to only include the first n rows, you could use the below function and call format_rows(rows, 4, 5). In essence, we're truncating each row to the first five before printing it.

    def format_rows(rows, gap, limit):
        split_rows = [row.split(',') for row in rows]
        # Splits each row up, by comma
        column_lengths = [max(col_len) for col_len in zip(*[map(len, row) for row in split_rows])]
        # Finds the maximum size of each column
    
        for row in split_rows:
            col_lengths = zip(row, column_lengths)[:limit]
            # Prints out only the first `limit` columns
    
            print ''.join(col.ljust(col_length + gap, ' ') for (col, col_length) in col_lengths)
            # Prints out the data, making sure there's a minimum of "gap" spaces 
            # between each column