I am trying to figure out how to use my own csv datafiles (originally from Yahoo finance) to be used within Zipline. I know you need to load the csv file into a pandas dataframe. But I can't seem to stop Zipline from downloading the data from Yahoo.
My csv file format:
Date, Open, High, Low, Close, Volume, AdjClose
My algofile:
from zipline.api import order, record, symbol
import pandas as pd
data = pd.read_csv('AAPL.csv')
def initialize(context):
pass
def handle_data(context, data):
order(symbol('AAPL'), 10)
record(AAPL=data[symbol('AAPL')].price)
My command line to create the pickle file:
run_algo.py -f E:\..\Main.py --start 2011-1-1 --end 2015-1-1 -o buyapple_out.pickle
Command line output:
[2015-03-27 10:18:20.809959] WARNING: Loader: No benchmark data found for date range.
start_date=2015-03-27 00:00:00+00:00, end_date=2015-03-27 10:18:19.973911, url=http://ichart.finance.yahoo.com/table.csv?a=2&s=%5EGSPC&b=27&e=27&d=2&g
=d&f=2015&c=2015
[2015-03-27 10:20:05.811965] INFO: Performance: Simulated 504 trading days out of 504.
[2015-03-27 10:20:05.811965] INFO: Performance: first open: 2013-01-02 14:31:00+00:00
[2015-03-27 10:20:05.811965] INFO: Performance: last close: 2014-12-31 21:00:00+00:00
My pickle file is created correctly. But it appears to still be using yahoo instead of my csv because the command line output talks about yahoo finance. There seems to be no documentation from Zipline on how to do this, other than 'load the csv into a dataframe'. What else?
Many thanks.
I believe your issue is how you are using the data, not in how you are reading it in. Here's a sample strategy using Bollinger Bands.
import pandas as pd
import locale
import matplotlib.pyplot as plt
from zipline.algorithm import TradingAlgorithm
import zipline.finance.trading as trading
from zipline.utils.factory import load_from_yahoo
from datetime import datetime, date
from pytz import timezone
import pytz
from dateutil.tz import tzutc
from dateutil.parser import parse
from zipline.finance.slippage import FixedSlippage
from zipline.finance.commission import PerShare, PerTrade
central = timezone('US/Central')
HOLDTIME = 5
locale.setlocale( locale.LC_ALL, 'en_US.UTF-8')
COMMISSION=0.005
def date_utc(s):
return parse(s, tzinfos=tzutc)
class BollingerBands(TradingAlgorithm):
def initialize(self):
self.invested = False
self.trade_size=1000
self.long = False
self.short = False
self.closed = False
self.trading_day_counter = 0
self.pct_stop = 0.025
self.long_stop_price = 0.0
self.short_stop_price=0.0
self.target=0.05
commission_cost = self.trade_size * COMMISSION
self.set_slippage(FixedSlippage(spread=0.10))
self.set_commission(PerTrade(cost=commission_cost))
def handle_data(self, data):
if data['Close'].price >= data['upper'].price and not self.invested:
self.order('Close', self.trade_size)
self.long_stop_price = data['Open'].price - data['Open'].price * float(self.pct_stop)
self.short_stop_price = data['Open'].price + data['Open'].price * float(self.target)
self.long = True
self.closed=False
self.invested = True
self.trading_day_counter = 0
if data['Close'].price <= data['lower'].price and not self.invested:
self.short_stop_price = data['Open'].price + data['Open'].price * float(self.pct_stop)
self.long_stop_price = data['Open'].price - data['Open'].price * float(self.target)
self.order('Close', -self.trade_size)
self.short = True
self.closed=False
self.invested = True
self.trading_day_counter = 0
if self.invested and (data['Close'].price <= self.long_stop_price or data['Close'].price >= self.short_stop_price): #or self.trading_day_counter == HOLDTIME):
if self.long:
self.order('Close', -self.trade_size)
if self.short:
self.order('Close', self.trade_size)
self.closed=True
self.long=False
self.short=False
self.invested=False
self.trading_day_counter = self.trading_day_counter +1
self.record(Close=data['Close'].price,
upper=data['upper'].price,
lower=data['lower'].price,
long=self.long,
short=self.short,
holdtime=self.trading_day_counter,
closed_position=self.closed,
shares=self.trade_size)
if __name__ == '__main__':
df=pd.read_csv('SPY.csv') # contains Date, Open, High, Low, Close, Volume
df['avg'] = pd.rolling_mean(df['Close'], 21)
df['std'] = pd.rolling_std(df['Close'], 21)
df['upper'] = df['avg'] + 2 * df['std']
df['lower'] = df['avg'] - 2 * df['std']
df = df.dropna()
# # # # init Strat Class
Strategy = BollingerBands()
# #print df
# # # # # # Run Strategy
results = Strategy.run(df)
results['algorithm_returns'] = (1 + results.returns).cumprod()
results.to_csv('output.csv')
print results['algorithm_returns'].tail(1)[0]*100