Search code examples
pythonalgorithmic-tradingpyalgotrade

How to convert these functions to act on the entire dataframe and speed up my python code


In an attempt to backtest the much-discussed trading approach known as the Smart Money Concept, I made a Python class with a few functions.

Now, the mistake I made was to make each function operate on the last candle/row and return results only for that row/candle. This has turned out to be a mistake because it will take a very long time to loop over the dataframe and feed each row to these functions if I was to backtest with six months' worth of data.

I require assistance with:

Converting the public functions/methods to act on the entire dataframe via vectorization and return the entire dataframe.

is_uptrend(), has_bull_choch()


Below is the source code:

from scipy.ndimage import  maximum_filter1d, minimum_filter1d
from scipy.signal import find_peaks
from scipy import stats

import numpy as np
import pandas as pd

class SmartMoney:
    
    def get_supports_and_resistances(self, df: pd.DataFrame) -> pd.DataFrame:
        df['is_support'] = 0
        df['is_resistance'] = 0
        df = self._get_resistances(df=df)
        df = self._get_supports(df=df)

        return df


    # Get support zones
    def _get_supports(self, df: pd.DataFrame) -> pd.DataFrame:
        if len(df) < 1:
            return df

        smoothed_low = minimum_filter1d(df.low, self.filter_size) if self.filter_size > 0 else df.low
        minimas, _ = find_peaks(x=-smoothed_low, prominence=self.look_back(df=df))

        if len(minimas) > 0:
            df.loc[minimas, 'is_support'] = 1        
        return df

    # Get resistances zones
    def _get_resistances(self, df: pd.DataFrame) -> pd.DataFrame:
        if len(df) < 1:
            return df

        smoothed_high = maximum_filter1d(df.high, self.filter_size) if self.filter_size > 0 else df.high
        maximas, _ = find_peaks(smoothed_high, prominence=self.look_back(df=df))
        if len(maximas) > 0:
            df.loc[maximas, 'is_resistance'] = 1
        return df

    def look_back(self, df: pd.DataFrame) -> int:
        return round(np.mean(df['high'] - df['low']))        

    def is_uptrend(self, df: pd.DataFrame) -> bool:
        if self._meets_requirement(df=df) == False:
            return False
        return (
                    df.loc[df['is_resistance'] == 1, 'high'].iloc[-1] > df.loc[df['is_resistance'] == 1, 'high'].iloc[-2] and 
                    df.loc[df['is_support'] == 1, 'low'].iloc[-1] > df.loc[df['is_support'] == 1, 'low'].iloc[-2]  
                )

    def is_downtrend(self, df: pd.DataFrame) -> bool:
        if self._meets_requirement(df=df) == False:
            return False
        return (
                    df.loc[df['is_resistance'] == 1, 'high'].iloc[-1] < df.loc[df['is_resistance'] == 1, 'high'].iloc[-2] and 
                    df.loc[df['is_support'] == 1, 'low'].iloc[-1] < df.loc[df['is_support'] == 1, 'low'].iloc[-2]  
                )

    def _meets_requirement(self, df: pd.DataFrame, minimum_required: int = 2) -> bool:
        return len(df.loc[df['is_resistance'] == 1]) >= minimum_required and len(df.loc[df['is_support'] == 1]) >= minimum_required
        
    # Check if there's Change of Character (as per Smart Money Concept)
    def has_bull_choch(self, df: pd.DataFrame, in_pullback_phase = False, with_first_impulse = False) -> bool:
        if df[df['is_resistance'] == 1].empty:
            return False
        
        left, right = self._get_left_and_right(df = df, divide_by_high=False)

        if len(left[left['is_resistance'] == 1]) < 1 or right.shape[0] < 1:
            return False

        # if we only want CHoCH that broke on first impulse move
        if with_first_impulse:
            if left.loc[left['is_resistance'] == 1, 'high'].iloc[-1] > right.loc[right['is_resistance'] == 1, 'high'].iloc[0] :
                return False

        # if we want CHoCH in pullback phase
        if in_pullback_phase:
            if right.iloc[right[right['is_resistance'] == 1].index[-1], right.columns.get_loc('high')] < right['high'].iloc[-1]:
                return False
        
        tmp = right[right['high'] > left.loc[left['is_resistance'] == 1, 'high'].iloc[-1]]
        if tmp.shape[0] > 0 :
            return True
        return False
    
    def _get_left_and_right(self, df: pd.DataFrame, divide_by_high = True) -> tuple[pd.DataFrame, pd.DataFrame]:
       
       # Get the lowest/highest support df
        off_set = df['low'].idxmin() if divide_by_high == False else df['high'].idxmax()

        # Get list of df before lowest support
        left    =   df[:off_set]

        # take only resistance and leave out support
        # left    =   left[left['is_resistance'] == 1]
        left.reset_index(drop=True, inplace=True) 


        # Get list aft the df after loweset support
        right   =   df[off_set:]

        # take only resistance and leave out support
        # right   =   right[right['is_resistance'] == 1]
        right.reset_index(drop=True, inplace=True)

        return pd.DataFrame(left), pd.DataFrame(right)

Test Data:

import yfinance as yfd
ticker_symbol = "BTC-USD"

start_date = "2023-06-01"
end_date = "2023-12-31"

bitcoin_data = yf.download(ticker_symbol, start=start_date, end=end_date)

# Reset the index to make the date a regular column
df = bitcoin_data.reset_index()

df.rename(columns={'Date': 'time', 'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'adj close': 'adj close', 'Volume': 'volume'}, inplace=True)

This is how i would like the code to work

from smart_money import SmartMoney
sm = SmartMoney()

# Get minimas and maximas (support and resistance)
df = sm.get_supports_and_resistances(df=df)
df = sm.is_uptrend(df=df)
df = sm.has_bull_choch(df=df)

Remember the objective is to have these function return a Dataframe with new column (name of column should be be the function name), column value can be a 1 or 0.

time open high low close volume
324 2023-11-28 37242.70 38377.00 36868.41 37818.87
325 2023-11-29 37818.88 38450.00 37570.00 37854.64
326 2023-11-30 37854.65 38145.85 37500.00 37723.96
327 2023-12-01 37723.97 38999.00 37615.86 38682.52
328 2023-12-02 38682.51 38821.59 38641.61 38774.95

Solution

  • This is what I eventually accomplished after much effort.

    I will try to work on other functions as well.


    Also compared execution time (From 2020-01-01 to 2023-12-31):

    Explicit Loop:

    CPU times: user 2.15 s, sys: 7.77 ms, total: 2.16 s Wall time: 2.17 s

    Vectorized Code:

    CPU times: user 3.79 ms, sys: 0 ns, total: 3.79 ms Wall time: 3.58 ms

    I will also check how execution time is affected when running on lower timeframe like 1h, 30m and 15m

    def is_uptrend(self, data: pd.DataFrame) -> pd.DataFrame:
    
        resistance_condition = (data['is_resistance'] == 1)
        cum_resistance = resistance_condition.cumsum()
        grouped_resistances = np.split(cum_resistance, np.where(np.diff(cum_resistance) != 0)[0] + 1)
        
        support_condition = (data['is_support'] == 1)
        cum_support = support_condition.cumsum()
        grouped_supports = np.split(cum_support, np.where(np.diff(cum_support) != 0)[0] + 1)
    
        
        high_condition = (
            data['High'].shift(len(grouped_resistances[-1])) > data['High'].shift(len(grouped_resistances[-2]))
        )
    
        low_condition = (
            data['Low'].shift(len(grouped_supports[-1])) > data['Low'].shift(len(grouped_supports[-2]))
        )
    
        data['is_uptrend'] = (high_condition & low_condition).astype(int)
    
        return data