I have a dataframe like
┌─────────────────────┬───────────┬───────────┬───────────┬───────────┬──────┐
│ ts ┆ 646150 ┆ 646151 ┆ 646154 ┆ 646153 ┆ week │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ datetime[μs] ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ i8 │
╞═════════════════════╪═══════════╪═══════════╪═══════════╪═══════════╪══════╡
│ 2024-02-01 00:00:00 ┆ 24.490348 ┆ 65.088941 ┆ 53.545259 ┆ 13.499832 ┆ 5 │
│ 2024-02-01 01:00:00 ┆ 15.054187 ┆ 63.095247 ┆ 60.786479 ┆ 29.538156 ┆ 5 │
│ 2024-02-01 02:00:00 ┆ 24.54212 ┆ 63.880298 ┆ 57.535928 ┆ 24.840966 ┆ 5 │
│ 2024-02-01 03:00:00 ┆ 24.85621 ┆ 69.778516 ┆ 67.57284 ┆ 24.672476 ┆ 5 │
│ 2024-02-01 04:00:00 ┆ 21.21628 ┆ 61.137849 ┆ 55.231299 ┆ 16.648383 ┆ 5 │
│ … ┆ … ┆ … ┆ … ┆ … ┆ … │
│ 2024-02-29 19:00:00 ┆ 23.17318 ┆ 62.590752 ┆ 72.026908 ┆ 24.614523 ┆ 9 │
│ 2024-02-29 20:00:00 ┆ 23.86416 ┆ 64.87102 ┆ 61.023656 ┆ 20.095353 ┆ 9 │
│ 2024-02-29 21:00:00 ┆ 18.553397 ┆ 67.530137 ┆ 63.477737 ┆ 17.313834 ┆ 9 │
│ 2024-02-29 22:00:00 ┆ 22.339175 ┆ 67.456563 ┆ 62.552035 ┆ 20.880844 ┆ 9 │
│ 2024-02-29 23:00:00 ┆ 15.5809 ┆ 66.774367 ┆ 57.066264 ┆ 29.529057 ┆ 9 │
└─────────────────────┴───────────┴───────────┴───────────┴───────────┴──────┘
which is generated as follows
import numpy as np
from datetime import datetime, timedelta
def generate_test_data():
# Function to generate hourly timestamps for a month
def generate_hourly_timestamps(start_date, end_date):
current = start_date
while current <= end_date:
yield current
current += timedelta(hours=1)
# Define the date range
start_date = datetime(2024, 2, 1)
end_date = datetime(2024, 2, 29, 23, 0, 0) # February 29th 23:00 for a leap year
# Generate the data
timestamps = list(generate_hourly_timestamps(start_date, end_date))
num_hours = len(timestamps)
data = {
"ts": timestamps,
"646150": np.random.uniform(15, 25, num_hours), # Random temperature data between 15 and 25
"646151": np.random.uniform(60, 70, num_hours), # Random humidity data between 60 and 70
"646154": np.random.uniform(50, 75, num_hours), # Random sensor data between 50 and 75
"646153": np.random.uniform(10, 30, num_hours) # Random sensor data between 10 and 30
}
df = pl.DataFrame(data)
df = df.with_columns(pl.col("ts").cast(pl.Datetime))
return df
df = generate_test_data()
# Add a week column
df = df.with_columns((pl.col("ts").dt.week()).alias("week"))
I would like to group by week or some other time intervals and aggregate using min
, mean
, and max
. For this, I could do something like
# Group by week and calculate min, max, and avg
aggregated_df = df.groupby("week").agg([
pl.col("646150").min().alias("646150_min"),
pl.col("646150").max().alias("646150_max"),
pl.col("646150").mean().alias("646150_avg"),
pl.col("646151").min().alias("646151_min"),
pl.col("646151").max().alias("646151_max"),
pl.col("646151").mean().alias("646151_avg"),
pl.col("646154").min().alias("646154_min"),
pl.col("646154").max().alias("646154_max"),
pl.col("646154").mean().alias("646154_avg"),
pl.col("646153").min().alias("646153_min"),
pl.col("646153").max().alias("646153_max"),
pl.col("646153").mean().alias("646153_avg")
])
but I would like to avoid specifying the column names.
I would like to generate the dataframe like below where the column value is a list or tuples or some other multiple value format that holds the min, max, avg values.
┌─────────────────────┬──────────────────┬──────────────────┐
│ week ┆ 646150 ┆ 646151 │
│ --- ┆ --- ┆ --- │
│ i8 ┆ List[f64] ┆ List[f64] │
╞═════════════════════╪══════════════════╪══════════════════╡
│ 5 ┆ [24.1,26.3,25.0] ┆ [22.1,23.3,22.5] │
│ … ┆ … ┆ … ┆
└─────────────────────┴──────────────────┴──────────────────┘
Is this possible in polars ?
Thanks
you can do something like this to get struct
-typed columns:
df.group_by("week").agg(
pl.struct(
pl.col(c).min().alias("min"),
pl.col(c).max().alias("max"),
pl.col(c).mean().alias("mean")
).alias(c)
for c in df.columns if c not in ('week', 'ts')
)
┌──────┬───────────────────────────┬───────────────────────────┬───────────────────────────┬───────────────────────────┐
│ week ┆ 646150 ┆ 646151 ┆ 646154 ┆ 646153 │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ i8 ┆ struct[3] ┆ struct[3] ┆ struct[3] ┆ struct[3] │
╞══════╪═══════════════════════════╪═══════════════════════════╪═══════════════════════════╪═══════════════════════════╡
│ 9 ┆ {15.044939,24.764209,20.0 ┆ {60.257012,69.928978,64.7 ┆ {50.530551,74.878361,63.0 ┆ {10.190688,29.82809,20.31 │
│ ┆ 59679… ┆ 75548… ┆ 78632… ┆ 305} │
│ 8 ┆ {15.102004,24.991653,20.0 ┆ {60.055959,69.92977,65.01 ┆ {50.048389,74.599839,61.2 ┆ {10.006159,29.938469,20.8 │
│ ┆ 30854… ┆ 5284} ┆ 97655… ┆ 86438… │
│ 5 ┆ {15.292633,24.75995,19.50 ┆ {60.068354,69.961624,64.4 ┆ {50.351197,74.665052,62.7 ┆ {10.128425,29.995835,20.5 │
│ ┆ 5275} ┆ 92186… ┆ 59774… ┆ 48913… │
│ 7 ┆ {15.054969,24.872284,20.1 ┆ {60.015595,69.978639,65.0 ┆ {50.264015,74.722436,61.2 ┆ {10.058671,29.906661,20.2 │
│ ┆ 02549… ┆ 56007… ┆ 16711… ┆ 89755… │
│ 6 ┆ {15.137601,24.97642,19.91 ┆ {60.118846,69.99577,65.32 ┆ {50.025932,74.968677,62.4 ┆ {10.003437,29.798739,19.7 │
│ ┆ 5258} ┆ 6988} ┆ 8508} ┆ 37678… │
└──────┴───────────────────────────┴───────────────────────────┴───────────────────────────┴───────────────────────────┘
or, if you want to put it into list
:
df.group_by("week").agg(
pl.concat_list(agg(c) for agg in [pl.min, pl.max, pl.mean])
for c in df.columns if c not in ('week', 'ts')
)
┌──────┬────────────────────────┬────────────────────────┬────────────────────────┬─────────────────────────────────┐
│ week ┆ 646150 ┆ 646151 ┆ 646154 ┆ 646153 │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ i8 ┆ list[f64] ┆ list[f64] ┆ list[f64] ┆ list[f64] │
╞══════╪════════════════════════╪════════════════════════╪════════════════════════╪═════════════════════════════════╡
│ 7 ┆ [15.054969, 24.872284, ┆ [60.015595, 69.978639, ┆ [50.264015, 74.722436, ┆ [10.058671, 29.906661, 20.2897… │
│ ┆ 20.1025… ┆ 65.0560… ┆ 61.2167… ┆ │
│ 6 ┆ [15.137601, 24.97642, ┆ [60.118846, 69.99577, ┆ [50.025932, 74.968677, ┆ [10.003437, 29.798739, 19.7376… │
│ ┆ 19.91525… ┆ 65.32698… ┆ 62.4850… ┆ │
│ 5 ┆ [15.292633, 24.75995, ┆ [60.068354, 69.961624, ┆ [50.351197, 74.665052, ┆ [10.128425, 29.995835, 20.5489… │
│ ┆ 19.50527… ┆ 64.4921… ┆ 62.7597… ┆ │
│ 9 ┆ [15.044939, 24.764209, ┆ [60.257012, 69.928978, ┆ [50.530551, 74.878361, ┆ [10.190688, 29.82809, 20.31305… │
│ ┆ 20.0596… ┆ 64.7755… ┆ 63.0786… ┆ │
│ 8 ┆ [15.102004, 24.991653, ┆ [60.055959, 69.92977, ┆ [50.048389, 74.599839, ┆ [10.006159, 29.938469, 20.8864… │
│ ┆ 20.0308… ┆ 65.01528… ┆ 61.2976… ┆ │
└──────┴────────────────────────┴────────────────────────┴────────────────────────┴─────────────────────────────────┘