I'm trying to import input CSV through CLI into Polars (Rust) DataFrame, however, cannot appear to do so.
Currently this is the process, which fails:
use std::io::{Stdin, stdin};
use polars::prelude::*;
fn load_file(file: Stdin) -> LazyFrame {
let lazy_df = CsvReader::from(file);
match lazy_df {
Ok(good_df) => good_df,
Err(error) => panic!("{}", error),
}
}
fn main() {
let reader = io::stdin();
let mut buffer = String::new();
reader
.lock()
.read_to_string(&mut buffer)
.expect("Failed in reading the file");
let df = load_file(reader);
}
exec:
cargo run < data/play_data.csv
I've found a work around using Cursor
. Polars has implementation for impl<T> MmapBytesReader for Cursor<T>
.
First read the entire Stdin
into a vector, then transform it into a cursor. At the end convert the cursor into a Polars expected type.
use std::io::{Read, Stdin};
use polars::io::mmap::MmapBytesReader;
use polars::prelude::*;
fn load_file(reader: &Stdin) -> () {
let mut v = Vec::<u8>::new();
reader
.lock()
.read_to_end(&mut v)
.expect("cannot read from stdin")
;
let cursor = std::io::Cursor::new(v);
let file = Box::new(cursor) as Box<dyn MmapBytesReader>;
let df = CsvReader::new(file)
.with_delimiter(b',')
.has_header(true)
.finish()
;
let df = match df {
Ok(x) => x,
Err(e) => {
panic!("cannot load data frame: {}", e);
}
};
let mask = df.column("Id").unwrap().i64().unwrap().gt_eq(100);
let filter = df.filter(&mask).unwrap().mean();
println!("filter: {filter}");
}
fn main() {
let reader = std::io::stdin();
load_file(&reader);
}