Search code examples
rustfunctional-programming

Optimized Word Count


What can be done to make the following word-count run even faster? The input text will always be in upper case.

use std::fs;
use std::env;
use std::collections::HashMap;

fn word_count(s: &str) -> HashMap<String, u32> {
    s.split_whitespace()
        .fold(HashMap::new(), |mut h, w| {
            *h.entry(w.to_string()).or_insert(0) += 1;
            h
        })
}

fn main() {
    let args: Vec<String> = env::args().collect();
    
    let fil = &args[1];

    println!("Will count words in the following file: {:?}", fil);


    let data = fs::read_to_string(&fil).expect("Unable to read file");
    let wc = word_count(&data);

    //println!("{:?}", wc);   

}

Solution

  • The biggest thing is to not create Strings.

    fn word_count(s: &str) -> HashMap<&str, u32> {
        s.split_whitespace()
            .fold(HashMap::new(), |mut h, w| {
                *h.entry(w).or_insert(0) += 1;
                h
            })
    }
    

    Also remember to run with --release when judging speed.


    Edit: if you want or need to keep Strings, it'll probably be faster to only create the string if the entry is missing.

    if let Some(entry) = h.get_mut(w) {
        *entry += 1;
    } else {
        h.insert(w.to_string(), 1);
    }
    h