Search code examples
rustrust-polars

CSV input redirection through CLI


I'm trying to import input CSV through CLI into Polars (Rust) DataFrame, however, cannot appear to do so.

Currently this is the process, which fails:

use std::io::{Stdin, stdin};
use polars::prelude::*;
fn load_file(file: Stdin) -> LazyFrame {
    let lazy_df = CsvReader::from(file);

    match lazy_df {
        Ok(good_df) => good_df,
        Err(error) => panic!("{}", error),
    }
}

fn main() {
    let reader = io::stdin();
    let mut buffer = String::new();
    reader
        .lock()
        .read_to_string(&mut buffer)
        .expect("Failed in reading the file");

    let df = load_file(reader);
}

exec: cargo run < data/play_data.csv


Solution

  • I've found a work around using Cursor. Polars has implementation for impl<T> MmapBytesReader for Cursor<T>.

    First read the entire Stdin into a vector, then transform it into a cursor. At the end convert the cursor into a Polars expected type.

    use std::io::{Read, Stdin};
    use polars::io::mmap::MmapBytesReader;
    use polars::prelude::*;
    
    fn load_file(reader: &Stdin) -> () {
        let mut v = Vec::<u8>::new();
        reader
            .lock()
            .read_to_end(&mut v)
            .expect("cannot read from stdin")
        ;
    
        let cursor = std::io::Cursor::new(v);
        let file = Box::new(cursor) as Box<dyn MmapBytesReader>;
    
        let df = CsvReader::new(file)
            .with_delimiter(b',')
            .has_header(true)
            .finish()
            ;
    
        let df = match df {
            Ok(x) => x,
            Err(e) => {
                panic!("cannot load data frame: {}", e);
            }
        };
    
        let mask = df.column("Id").unwrap().i64().unwrap().gt_eq(100);
        let filter = df.filter(&mask).unwrap().mean();
    
        println!("filter: {filter}");
    }
    
    fn main() {
        let reader = std::io::stdin();
        load_file(&reader);
    }