Search code examples
pythonpandasrolling-computation

non fixed rolling window


I am looking to implement a rolling window on a list, but instead of a fixed length of window, I would like to provide a rolling window list:
Something like this:

l1 = [5, 3, 8, 2, 10, 12, 13, 15, 22, 28]
l2 = [1, 2, 2, 2, 3, 4, 2, 3, 5, 3]
get_custom_roling( l1, l2, np.average)

and the result would be:

[5, 4, 5.5, 5, 6.67, ....]

6.67 is calculated as average of 3 elements 10, 2, 8.

I implemented a slow solution, and every idea is welcome to make it quicker :):

import numpy as np



def get_the_list(end_point, number_points):
   """ 
   example: get_the_list(6, 3) ==> [4, 5, 6]
   example: get_the_list(9, 5) ==> [5, 6, 7, 8, 9]
   """
    if np.isnan(number_points):
        return []
    number_points = int( number_points)
    return list(range(end_point, end_point - number_points, -1  ))

def get_idx(s):
    ss = list(enumerate(s) )
    sss = (get_the_list(*elem)  for elem in ss  )
    return sss

def get_custom_roling(s, ss, funct):
    output_get_idx = get_idx(ss)
    agg_stuff = [s[elem] for elem in output_get_idx]
    res_agg_stuff = [ funct(elem) for elem in agg_stuff   ]
    res_agg_stuff = eiu.pd.Series(data=res_agg_stuff, index = s.index)
    return res_agg_stuff


Solution

  • Pandas custom window rolling allows you to modify size of window.

    Simple explanation: start and end arrays hold values of indexes to make slices of your data.

    #start = [0  0  1  2  2  2  5  5  4  7]
    #end =   [1  2  3  4  5  6  7  8  9 10]
    

    Arguments passed to get_window_bounds are given by BaseIndexer.

    import pandas as pd
    import numpy as np
    from pandas.api.indexers import BaseIndexer
    from typing import Optional, Tuple
    
    
    class CustomIndexer(BaseIndexer):
    
        def get_window_bounds(self,
                              num_values: int = 0,
                              min_periods: Optional[int] = None,
                              center: Optional[bool] = None,
                              closed: Optional[str] = None
                              ) -> Tuple[np.ndarray, np.ndarray]:
    
            end = np.arange(1, num_values+1, dtype=np.int64)
            start = end - np.array(self.custom_name_whatever, dtype=np.int64)
            return start, end
    
    df = pd.DataFrame({"l1": [5, 3, 8, 2, 10, 12, 13, 15, 22, 28],
                       "l2": [1, 2, 2, 2,  3,  4,  2,  3,  5,  3]})
    
    indexer = CustomIndexer(custom_name_whatever=df.l2)
    
    df["variable_mean"] = df.l1.rolling(indexer).mean()
    
    print(df)
    

    Outputs:

       l1  l2  variable_mean
    0   5   1       5.000000
    1   3   2       4.000000
    2   8   2       5.500000
    3   2   2       5.000000
    4  10   3       6.666667
    5  12   4       8.000000
    6  13   2      12.500000
    7  15   3      13.333333
    8  22   5      14.400000
    9  28   3      21.666667