Search code examples
rustrust-polars

How to parse date string with days and months without 0 padding in rust version of polars?


I am reading a csv file with date in month day year format (e.g. "11/15/2022"). But month and day do not have 0 padding. Following is my test code

use polars::prelude::*;
use polars_lazy::prelude::*;

fn main() {
    let df = df![
        "x" => ["1/4/2011", "2/4/2011", "3/4/2011", "4/4/2011"],
        "y" => [1, 2, 3, 4],
    ].unwrap();
    let lf: LazyFrame = df.lazy();

    let options = StrpTimeOptions {
        fmt: Some("%m/%d/%Y".into()),
        date_dtype: DataType::Date,
        ..Default::default()
    };

    let res = lf.clone()
    .with_column(col("x").str().strptime(options).alias("new time"))
    .collect().unwrap();

    println!("{:?}", res);

}

The output is

shape: (4, 3)
┌──────────┬─────┬──────────┐
│ x        ┆ y   ┆ new time │
│ ---      ┆ --- ┆ ---      │
│ str      ┆ i32 ┆ date     │
╞══════════╪═════╪══════════╡
│ 1/4/2011 ┆ 1   ┆ null     │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
│ 2/4/2011 ┆ 2   ┆ null     │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
│ 3/4/2011 ┆ 3   ┆ null     │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
│ 4/4/2011 ┆ 4   ┆ null     │

in the options I tried "%-m/%-d/%Y instead of "%m/%d/%Y as mentioned in documentation. But it panicked at runtime.

thread '<unnamed>' panicked at 'attempt to subtract with overflow', /home/xxx/.cargo/registry/src/github.com-1ecc6299db9ec823/polars-time-0.21.1/src/chunkedarray/utf8/mod.rs:234:33

What is a correct way to read this format. I am using "Ubuntu 20.04.4 LTS"


Solution

  • Your Default is making it run with the wrong flags. You need to set exact to true:

    ...
        let options = StrpTimeOptions {
            fmt: Some("%-m/%-d/%Y".into()),
            date_dtype: DataType::Date,
            exact: true,
            ..Default::default()
        };
    ...
    

    Full code with padding included tested:

    use polars::prelude::*;
    use polars_lazy::dsl::StrpTimeOptions;
    use polars_lazy::prelude::{col, IntoLazy, LazyFrame};
    
    fn main() {
        let df = df![
            "x" => ["01/04/2011", "2/4/2011", "3/4/2011", "4/4/2011"],
            "y" => [1, 2, 3, 4],
        ]
        .unwrap();
        let lf: LazyFrame = df.lazy();
    
        let options = StrpTimeOptions {
            fmt: Some("%-m/%-d/%Y".into()),
            date_dtype: DataType::Date,
            exact: true,
            ..Default::default()
        };
    
        let res = lf
            .clone()
            .with_column(col("x").str().strptime(options).alias("new time"))
            .collect()
            .unwrap();
    
        println!("{:?}", res);
    }
    
    

    Outputs:

    shape: (4, 3)
    ┌────────────┬─────┬────────────┐
    │ x          ┆ y   ┆ new time   │
    │ ---        ┆ --- ┆ ---        │
    │ str        ┆ i32 ┆ date       │
    ╞════════════╪═════╪════════════╡
    │ 01/04/2011 ┆ 1   ┆ 2011-01-04 │
    ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
    │ 2/4/2011   ┆ 2   ┆ 2011-02-04 │
    ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
    │ 3/4/2011   ┆ 3   ┆ 2011-03-04 │
    ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
    │ 4/4/2011   ┆ 4   ┆ 2011-04-04 │
    └────────────┴─────┴────────────┘