Search code examples
dataframerustpython-polarsrust-polars

Combine different values of multiple columns into one column


Need help to "translate" a python example to rust. The python example was given here

Here is the code snippet I try to make work:

use polars::prelude::*;

fn main() {
    let s1 = Series::new("Fruit", &["Apple", "Apple", "Pear"]);
    let s2 = Series::new("Color", &["Red", "Yellow", "Green"]);

    let df = DataFrame::new(vec![s1, s2]).unwrap();

    let df_lazy = df.lazy();

    /*

    This is the PYTHON version I like to recreate in RUST:

    df_lazy.with_columns([
                    # string fmt over multiple expressions
                    pl.format("{} has {} color", "Fruit", "Color").alias("fruit_list"),
                    # columnar lambda over multiple expressions
                    pl.map(["Fruit", "Color"], lambda s: s[0] + " has " + s[1] + " color" ).alias("fruit_list2"),
                    ])
     */

}

I can't even get a simple select to work?! Now I am lost.


Solution

  • Thanks to the comments (@ polars issues) from @cannero and @ritchie46, I was able to make it work.

    This is a working version (Float64):

    use polars::prelude::*;
    
    fn my_black_box_function(a: f64, b: f64) -> f64 {
        // do something
        a
    }
    
    fn apply_multiples(lf: LazyFrame) -> Result<DataFrame> {
       
        let ergebnis = lf
            .select([col("struct_col").map(
                |s| {
                    let ca = s.struct_()?;
    
                    let b = ca.field_by_name("a")?;
                    let a = ca.field_by_name("b")?;
                    let a = a.f64()?;
                    let b = b.f64()?;
    
                    let out: Float64Chunked = a
                        .into_iter()
                        .zip(b.into_iter())
                        .map(|(opt_a, opt_b)| match (opt_a, opt_b) {
                            (Some(a), Some(b)) => Some(my_black_box_function(a, b)),
                            _ => None,
                        })
                        .collect();
    
                    Ok(out.into_series())
                },
                GetOutput::from_type(DataType::Float64),
            )])
            .collect();
    
        ergebnis
    }
    
    fn main() {
        // We start with a normal DataFrame
        let df = df![
            "a" => [1.0, 2.0, 3.0],
            "b" => [3.0, 5.1, 0.3]
        ]
        .unwrap();
    
        // We CONVERT the df into a StructChunked and WRAP this into a new LazyFrame
        let lf = df![
            "struct_col" => df.into_struct("StructChunked")
        ]
        .unwrap()
        .lazy();
    
        let processed = apply_multiples(lf);
    
        match processed {
            Ok(..) => println!("We did it"),
            Err(e) => println!("{:?}", e),
        }
    }
    

    Here is a version for my initial question (String):

    use polars::prelude::*;
    
    fn my_fruit_box(fruit: String, color: String) -> String {
        // do something
        format!("{} has {} color", fruit, color)
    }
    
    fn apply_multiples(lf: LazyFrame) -> Result<DataFrame> {
        
        let ergebnis = lf
            .select([col("struct_col").map(
                |s| {
                    let ca = s.struct_()?;
    
                    let fruit = ca.field_by_name("Fruit")?;
                    let color = ca.field_by_name("Color")?;
                    let color = color.utf8()?;
                    let fruit = fruit.utf8()?;
    
                    let out: Utf8Chunked = fruit
                        .into_iter()
                        .zip(color.into_iter())
                        .map(|(opt_fruit, opt_color)| match (opt_fruit, opt_color) {
                            (Some(fruit), Some(color)) => {
                                Some(my_fruit_box(fruit.to_string(), color.to_string()))
                            }
                            _ => None,
                        })
                        .collect();
    
                    Ok(out.into_series())
                },
                GetOutput::from_type(DataType::Utf8),
            )])
            .collect();
    
        ergebnis
    }
    
    fn main() {
        // We start with a normal DataFrame
        let s1 = Series::new("Fruit", &["Apple", "Apple", "Pear"]);
        let s2 = Series::new("Color", &["Red", "Yellow", "Green"]);
    
        let df = DataFrame::new(vec![s1, s2]).unwrap();
    
        // We CONVERT the df into a StructChunked and WRAP this into a new LazyFrame
        let lf = df![
            "struct_col" => df.into_struct("StructChunked")
        ]
        .unwrap()
        .lazy();
    
        let processed = apply_multiples(lf);
    
        match processed {
            Ok(..) => println!("We did it"),
            Err(e) => println!("{:?}", e),
        }
    }