Search code examples
csvrustserde

How do I input trimmed data into a csv reader in Rust?


I am quite new to rust, and am trying to use the csv and serde crates to read a .csv file.

The issue that I am having is that the csv file has some junk at the front, so I need trim that off before I can read the data that I want.

I do not get any compilation errors, but I do not get any prints from the csv deserialization block

I know that in my code, the data is ok up right until I make the csv reader.

use std::{error::Error, process};
use std::io::Cursor;

use serde::Deserialize;

#[derive(Debug, Deserialize)]

struct Record {
    beans: String,
    rice: String,
    jesus: String,
    christ: String,
}
fn run() -> Result<(), Box<dyn Error>> {
    let data = "
   foobar
   foo,bar,boo,far
   
   
   
   
   boofar
   beans,rice,jesus,christ
   thing,thing2,thing3,thing4
   thing5,thing6,thing7,thing8
   ";

    // Create an iterator over the lines
    let mut lines_iter = data.lines();

    // Skip the first 8 lines
    for _ in 0..8 {
        if let Some(line) = lines_iter.next() {
            // Skip the line
        } else {
            // File has fewer than 8 lines, handle this error case if needed
            eprintln!("File has fewer than 8 lines");
            process::exit(1);
        }
    }

    let mut remaining_lines = Vec::new();

    // Collect the remaining lines, removing newline characters
    for line in lines_iter {
        let pusher = line.to_string(); // Convert &str to String
        println!("test {:?}", &pusher);
        remaining_lines.push(pusher);
    }

    let csv_data = remaining_lines.join(",");

    // Create CSV reader from the remaining lines
    let mut rdr = csv::ReaderBuilder::new()
        // .trim(Trim::Fields)
        .from_reader(Cursor::new(csv_data));

    // Iterate over CSV records and process them
    for result in rdr.deserialize() {
        let record: Record = result?;
        println!("{:?}", record);
    }

    Ok(())
}

fn main() {
    if let Err(err) = run() {
        println!("{}", err);
        process::exit(1);
    }
}

Solution

  • You concatenated all rows into a single one. That makes them all one header row, which the deserializer ignores.

    Example:

    use std::error::Error;
    use std::io::Cursor;
    
    use serde::Deserialize;
    
    #[derive(Debug, Deserialize)]
    struct Record {
        beans: String,
        rice: String,
        jesus: String,
        christ: String,
    }
    
    fn main() -> Result<(), Box<dyn Error>> {
        let data = "
       foobar
       foo,bar,boo,far
       
       
       
       
       boofar
       beans,rice,jesus,christ
       thing,thing2,thing3,thing4
       thing5,thing6,thing7,thing8
       ";
    
        let (skip, _) = data
            .char_indices()
            .filter(|&(_, ch)| ch == '\n')
            .nth(7)
            .expect("no 8 lines");
        let csv_data = &data[skip..].trim();
    
        // Create CSV reader from the remaining lines
        let mut rdr = csv::ReaderBuilder::new()
            // .trim(Trim::Fields)
            .from_reader(Cursor::new(csv_data));
    
        // Iterate over CSV records and process them
        for result in rdr.deserialize() {
            let mut record: Record = result?;
            
            // Remove the leading indentation.
            record.beans.replace_range(0.."   ".len(), "");
            
            println!("{:?}", record);
        }
    
        Ok(())
    }