Search code examples
pythonparallel-processingconcurrent.futures

How to use wait to iterate and append to a dataframe


I'm trying to make sense of how to append a list of stocks into a single dataframe.

Someone said I needed to use the wait statement (if I want to iterate using an append statement). I think I have it setup right, but I can't even do a simple iteration

from concurrent.futures import wait, ALL_COMPLETED

import concurrent.futures
import datetime
from datetime import timedelta
import yfinance as yf

pool = concurrent.futures.ThreadPoolExecutor(8)

end=datetime.date.today()
start=end - timedelta(weeks=104)

symbols = ['GOOG','CSCO']

def dl(stock):
    #sleep(randint(1, 5))
    #print(stock)
    return yf.download(stock, start=start, end=end).iloc[: , :5].dropna(axis=0, how='any')

futures = [pool.submit(dl, args) for args in symbols]
wait(futures, timeout=10, return_when=ALL_COMPLETED)

#print(futures[1])
futures[0].result()

stocks=[]

for x in range(len(symbols)):
    print(x)
    stocks.append(futures[x].result())
    futures[x].result()
    
print(stocks)

So... if I do the following

stocks = []
# CHANGE IN THE BELOW LINE
for x in range(len(futures)):
    #print(x)
    stocks.append(futures[x].result())
    #futures[x].result()

print(stocks)

It will print, but then it is two blocks of 502 rows each... and I want a single dataframe (i.e. 1004 rows). I was able to accomplish this same behaviour before without using wait...


Solution

  • from concurrent.futures import wait, ALL_COMPLETED
    
    import concurrent.futures
    import datetime
    from datetime import timedelta
    import yfinance as yf
    
    pool = concurrent.futures.ThreadPoolExecutor(8)
    
    end = datetime.date.today()
    start = end - timedelta(weeks=104)
    
    stocks = ['GOOG', 'CSCO']
    
    
    def dl(stock):
        # sleep(randint(1, 5))
        # print(stock)
        return yf.download(stock, start=start, end=end).iloc[:, :5].dropna(axis=0, how='any')
    
    
    futures = [pool.submit(dl, args) for args in stocks]
    wait(futures, timeout=10, return_when=ALL_COMPLETED)
    
    
    # CHANGE IN THE BELOW LINE
    stocks_data = pd.DataFrame()
    for x in range(0,len(stocks)):
        stocks_data = pd.concat([stocks_data,pd.DataFrame(futures[x].result())])
    print(stocks_data.shape)
    (1004, 5)