Search code examples
rustrust-polars

What is the idiomatic way to operate on a single Series within a DataFrame in rust polars?


I am a beginner and want to understand if my code is idiomatic polars rust.

I have written the following function:

fn transform_str_to_tb(s: Series) -> Series {
    s.iter()
        .map(|v| {
            let value = v.to_string().replace([',', '\"'], "");
            let mut parts = value.split_whitespace();
            let num = parts.next().unwrap().parse::<f64>().unwrap();
            let unit = parts.next().unwrap();

            match unit {
                "KB" => num / (1000.0 * 1000.0 * 1000.0),
                "MB" => num / (1000.0 * 1000.0),
                "GB" => num / 1000.0,
                "TB" => num,
                "PB" => num * 1000.0,
                _ => panic!("Unsupported unit: {}", unit),
            }
        })
        .collect()
}

I then call it when operating on a DataFrame as follows:

let df = df
    .lazy()
    .with_columns([col("value")
        .map(
            |s| Ok(Some(transform_str_to_tb(s))),
            GetOutput::default(),
        )
        .alias("value_tb")])
    .collect()?;

Any recommendations for how this can be improved?


Solution

  • You definitely don't want to use Series::iter, which produces items as AnyValue (and which you're converting to String by using the Display impl). You instead want something like this:

    fn transform_str_to_tb(s: Series) -> PolarsResult<Option<Series>> {
        Ok(Some(
            s.str()?  // fallibly get underlying StringChunked
                .iter()
                .map(|x| {
                    let mut parts = x?.split_whitespace();
                    let num = parts.next()?.parse::<f64>().unwrap();
                    let unit = parts.next()?;
                    let multiplier = match unit {
                        "KB" => 1.0 / (1000.0 * 1000.0 * 1000.0),
                        "MB" => 1.0 / (1000.0 * 1000.0),
                        "GB" => 1.0 / 1000.0,
                        "TB" => 1.0,
                        "PB" => 1.0 * 1000.0,
                        _ => panic!("Unsupported unit: {}", unit),
                    };
    
                    Some(num * multiplier)
                })
                .collect::<Float64Chunked>()
                .into_series(),
        ))
    }
    
    let df = df! [
        "value" => ["1 KB", "2 TB"]
    ]?
    .lazy()
    .with_column(
        col("value").map(transform_str_to_tb, GetOutput::float_type()),
    );