I want to use Ibis to return the mean and standard deviation of each floating point numeric column in a table. How do I do that?
Here's a standalone example that uses Ibis to return the mean and standard deviation of each floating point column in the iris flower dataset. This uses Ibis selectors:
import ibis
from ibis import _
import ibis.selectors as s
import ibis.expr.datatypes as dt
import seaborn as sns
iris = ibis.memtable(sns.load_dataset("iris"))
iris.aggregate(
s.across(
s.of_type(dt.float()),
dict(mean=ibis._.mean(), std=ibis._.std())
)
)
This returns:
┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ sepal_length_mean ┃ sepal_width_mean ┃ petal_length_mean ┃ petal_width_mean ┃ sepal_length_std ┃ sepal_width_std ┃ petal_length_std ┃ petal_width_std ┃
┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ float64 │ float64 │ float64 │ float64 │ float64 │ float64 │ float64 │ float64 │
├───────────────────┼──────────────────┼───────────────────┼──────────────────┼──────────────────┼─────────────────┼──────────────────┼─────────────────┤
│ 5.843333 │ 3.057333 │ 3.758 │ 1.199333 │ 0.828066 │ 0.435866 │ 1.765298 │ 0.762238 │
└───────────────────┴──────────────────┴───────────────────┴──────────────────┴──────────────────┴─────────────────┴──────────────────┴─────────────────┘
You can replace s.of_type(dt.float())
with s.numeric()
to apply this to all numeric columns including integers. Or you can use other members of dt
to refer to other column types.