Search code examples
rusttraitsserde-json

Remove duplicates from array of serde_json::Value in Rust


I have an array of generic serde_json::Value, that may also contain duplicates.

serde_json::Value does not implement std::cmp::Ord and I also cannot implement the trait because only traits defined in the current crate can be implemented for arbitrary types

What is the best/fastest way to eliminate duplicates (and create a custom order) on that array?


Solution

  • You can use the new type idiom to add traits to existing structs.

    use serde_json::Value;
    use std::collections::HashSet;
    use std::hash::{Hash, Hasher};
    
    struct HashableValue<'a>(&'a Value);
    
    fn main() {
        let value: Value =
            serde_json::from_str(r#"{"a":[{"a":1},{"a":1},{"b":1}],"t":[1,2,3,1,"asdf","df","asdf"]}"#)
                .unwrap();
        let value = remove_duplicates(&value);
        eprintln!("{}", value);
    }
    
    fn remove_duplicates(value: &Value) -> Value {
        match value {
            Value::Array(arr) => {
                let mut set = HashSet::new();
                let mut array = vec![];
                for a in arr {
                    let v = HashableValue(a);
                    if set.contains(&v) {
                        continue;
                    }
                    set.insert(v);
                    array.push(remove_duplicates(a));
                }
                return Value::Array(array);
            }
            Value::Object(obj) => {
                let mut map = serde_json::Map::new();
                for x in obj.iter() {
                    map.insert(x.0.clone(), remove_duplicates(x.1));
                }
                return Value::Object(map);
            }
            _ => {}
        }
        value.clone()
    }
    
    impl<'a> Hash for HashableValue<'a> {
        fn hash<H: Hasher>(&self, state: &mut H) {
            match &self.0 {
                Value::Null => 0.hash(state),
                Value::Bool(b) => b.hash(state),
                Value::Number(n) => n.hash(state),
                Value::String(str) => str.hash(state),
                Value::Array(arr) => arr.iter().for_each(|a| HashableValue(a).hash(state)),
                Value::Object(obj) => obj.iter().for_each(|entry| {
                    entry.0.hash(state);
                    HashableValue(entry.1).hash(state);
                }),
            }
        }
    }
    
    impl<'a> PartialEq<Self> for HashableValue<'a> {
        fn eq(&self, other: &Self) -> bool {
            self.0.eq(other.0)
        }
    }
    
    impl<'a> Eq for HashableValue<'a> {}