Search code examples
vectorrusthashmapbytebuffer

Flatten a Map<Vec<u8>, Vec<u8>> into a Vec<u8> and then return it to a Map<Vec<u8>, Vec<u8>>


I've have data in a HashMap<Vec<u8>, Vec<u8>> and I want to write that data to a file as a byte buffer (a single Vec<u8>) and then read it back from the file and reconstruct the HashMap structure.

Is there an established algorithm for flattening and recovering maps like this? I could write metadata into the file to distinguish where the data partitions etc. I can't use structured serialization because of the nature of this project — I am encrypting the data and the file.


Solution

  • You may store this with the following format:

    value1_len | value1_bytes | key1_len | key1_bytes | value2_len | value2_bytes | key2_len | key2_bytes | ...

    what can be fairly easily done with the standard library (playground):

    use std::collections::HashMap;
    use std::convert::TryInto;
    
    fn serialize(map: &HashMap<Vec<u8>, Vec<u8>>) -> Vec<u8> {
        map.iter().fold(Vec::new(), |mut acc, (k, v)| {
            acc.extend(&k.len().to_le_bytes());
            acc.extend(k.as_slice());
            acc.extend(&v.len().to_le_bytes());
            acc.extend(v.as_slice());
            acc
        })
    }
    
    fn read_vec(input: &mut &[u8]) -> Vec<u8> {
        let (len, rest) = input.split_at(std::mem::size_of::<usize>());
        let len = usize::from_le_bytes(len.try_into().unwrap());
        let (v, rest) = rest.split_at(len);
        *input = rest;
        v.to_vec()
    }
    
    fn deserialize(bytes: &Vec<u8>) -> HashMap<Vec<u8>, Vec<u8>> {
        let mut map = HashMap::new();
    
        let mut left = &bytes[..];
        while left.len() > 0 {
            let k = read_vec(&mut left);
            let v = read_vec(&mut left);
            map.insert(k, v);
        }
    
        map
    }
    
    fn main() {
        let mut map = HashMap::new();
        map.insert(vec![1, 2, 3], vec![4, 5, 6]);
        map.insert(vec![4, 5, 6], vec![1, 2, 3]);
        map.insert(vec![1, 5, 3], vec![4, 2, 6]);
    
        let array = serialize(&map);
        let recovered_map = deserialize(&array);
    
        assert_eq!(map, recovered_map);
    }