Is it possible to do something like the following in python polars:
import polars as pl
import statsmodels.api as sm
lowess = sm.nonparametric.lowess
df = pl.DataFrame([pl.Series('x', ['a', 'a', 'a', 'b','b', 'b']),
pl.Series('y', [1, 2, 3, 1, 2, 3]),
pl.Series('z', [.2, .3, .5, .1, .3, .7])]
)
df.with_columns(
pl.struct('z', 'y').map_batches(lambda cols: pl.DataFrame(lowess(cols['z'], cols['y'], frac = .1)))
.over('x')
)
# ComputeError: TypeError: cannot select elements using Sequence with elements of type 'str'
I want to group by one or more columns and then apply a function with more than 1 argument.
To reach specific field of struct
you can use .struct.field()
method.
def get_lowess(s: pl.Series) -> pl.Series:
return pl.Series(
lowess(s.struct.field("z"), s.struct.field("y"), frac = .1)
)
df.with_columns(
pl.struct('z', 'y').map_batches(get_lowess).over('x').alias("lowess")
)
shape: (6, 4)
┌─────┬─────┬─────┬───────────────┐
│ x ┆ y ┆ z ┆ lowess │
│ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ f64 ┆ array[f64, 2] │
╞═════╪═════╪═════╪═══════════════╡
│ a ┆ 1 ┆ 0.2 ┆ [1.0, 0.2] │
│ a ┆ 2 ┆ 0.3 ┆ [2.0, 0.3] │
│ a ┆ 3 ┆ 0.5 ┆ [3.0, 0.5] │
│ b ┆ 1 ┆ 0.1 ┆ [1.0, 0.1] │
│ b ┆ 2 ┆ 0.3 ┆ [2.0, 0.3] │
│ b ┆ 3 ┆ 0.7 ┆ [3.0, 0.7] │
└─────┴─────┴─────┴───────────────┘