A feature of some numpy functions is that they work on both arrays and Pandas series:
my_series = pd.Series([10, 20, 30], index=([2000, 2001, 2002]), name='My series')
my_array = np.array([10, 20, 30])
print(np.cumsum(my_series))
print(np.cumsum(my_array))
Output:
2000 10
2001 30
2002 60
Name: My series, dtype: int64
[10 30 60]
How is this achieved and can I write my own functions in a way that does this?
As an example, let's say I have a Python function that takes an array and returns an array of the same length:
def my_func(x):
a = np.empty_like(x)
b = 0
for i in range(len(x)):
b += x[i]
a[i] = b
return a
How can I generalize it so that it either returns an array or a Pandas series/dataframe if one was passed?
I realize I could do the following, but I suspect this is not how it is done in the Numpy example above:
def my_func_for_array_or_series(x):
try:
a = x.copy()
a[:] = my_func(x.values)
except AttributeError:
a = my_func(x)
return a
Here's a workaround utilizing a decorator.
import numpy as np
import pandas as pd
import functools
def apply_to_pandas(func):
@functools.wraps(func)
def wrapper_func(x, *args, **kwargs):
out = x.copy()
out[:] = np.apply_along_axis(func, 0, x, *args, **kwargs)
return out
return wrapper_func
@apply_to_pandas
def my_func(x):
a = np.empty_like(x)
b = 0
for i in range(len(x)):
b += x[i]
a[i] = b
return a
# Test
my_array = np.array([10, 20, 30])
my_series = pd.Series([10, 20, 30], index=[2000, 2001, 2002], name='My series')
my_df = pd.concat([my_series] * 3, axis=1)
for item in [my_array, my_series, my_df]:
print(my_func(item), end='\n\n')
Output:
[10 30 60]
2000 10
2001 30
2002 60
Name: My series, dtype: int64
My series My series My series
2000 10 10 10
2001 30 30 30
2002 60 60 60
Here is a more verbose but efficient version of the decorator that doesn't make an unnecessary copy of the data:
def apply_to_pandas(func):
@functools.wraps(func)
def wrapper_func(x, *args, **kwargs):
if isinstance(x, (np.ndarray, list)):
out = func(x, *args, **kwargs)
else:
out = x.copy(deep=False)
out[:] = np.apply_along_axis(func, 0, x, *args, **kwargs)
return out
return wrapper_func