Search code examples
rustdeserializationserde-json

Deserialising this kind of nested JSON structure delivered by reqwest


Someone may well point to a duplicate on this. But I have searched. There are lots of questions on the subject, but none I've really seems to help.

The structure of my delivered JSON (from Elasticsearch) is like this:

When I go:

let json_hashmap: HashMap<String, Value> = serde_json::from_str(&text).unwrap();

I get this:

{
    "hits": Object {
        "hits": Array [
            Object {
                "_id": String("Ybgt6ooBpXznUptX4lR2"),
                "_index": String("booby"),
                "_score": Number(1.0),   
                ... (other keys)
            ],
            ... (multiple other hit-hit Objects in the Array)
         ],
         ... (other keys)
    }
    "key2": String("my arbitrary string value"),
    "key3": Number(1.0),
    ... (other keys)
}

It's not that the keys and nested array of hit-hit "dictionaries" are unpredictable. I can devise a struct for these hit-hit dictionaries. But how can I actually obtain the array of hit-hits?

Because as can be seen, the level 0 dictionary (outer dictionary) has ONE key which decodes to an inner dictionary ("level 1") containing an array of inner-inner dictionaries ("level 2").

But the level 0 dictionary has keys other than "hits" which decode to other values. And indeed the level 1 dictionary may also have keys other than "hits".

Is there a way of stipulating that one particular key in the "level 0" (outer) dictionary decodes to HashMap<String, Value> and that one particular key in that dictionary decodes to Vec<MyStruct>?

NB I can get access to some object if I go

let outer_hits = &json_hasmap["hits"];
let inner_hits = &outer_hits["hits"];

Printing this using {:#?} shows this:

Array [
    Object {
        "_id": String("Ybgt6ooBpXznUptX4lR2"),
        "_index": String("booby"),
        "_score": Number(1.0),
        "_source": Object {
            "docx_id": Number(14),
            "last_modif_ms": Number(1596958203000),
            "ldoc_type": String("docx_doc"),
            "path": String("D:\\My Documents\\doc\\IT_Diary\\IT Diary $UPD 2018-03.docx"),
        },
    },
    Object {
        ...

... but, for example, although this is said to be type Array, my attempt to go inner_hits.len() won't compile: I get " ^^^ method not found in &Value". Strange kind of an "array". How can I extract something more useable?

text at the very start is:

{
  "_scroll_id": "FGluY2x1ZGVfY29udGV4dF91dWlkDnF1ZXJ5VGhlbkZldGNoBxZQcGUxbjhnQ1RzU0M1bG1ZR05od1VBAAAAAAAAASAWbkU3ck5YUTNSazZlazJtUXctSklWQRZQcGUxbjhnQ1RzU0M1bG1ZR05od1VBAAAAAAAAASEWbkU3ck5YUTNSazZlazJtUXctSklWQRZQcGUxbjhnQ1RzU0M1bG1ZR05od1VBAAAAAAAAASIWbkU3ck5YUTNSazZlazJtUXctSklWQRZQcGUxbjhnQ1RzU0M1bG1ZR05od1VBAAAAAAAAASMWbkU3ck5YUTNSazZlazJtUXctSklWQRZQcGUxbjhnQ1RzU0M1bG1ZR05od1VBAAAAAAAAASQWbkU3ck5YUTNSazZlazJtUXctSklWQRZQcGUxbjhnQ1RzU0M1bG1ZR05od1VBAAAAAAAAASUWbkU3ck5YUTNSazZlazJtUXctSklWQRZQcGUxbjhnQ1RzU0M1bG1ZR05od1VBAAAAAAAAASYWbkU3ck5YUTNSazZlazJtUXctSklWQQ==",
  "took": 3,
  "timed_out": false,
  "_shards": {
    "total": 10,
    "successful": 10,
    "skipped": 3,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 5303,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "booby",
        "_id": "Ybgt6ooBpXznUptX4lR2",
        "_score": 1,
        "_source": {
          "docx_id": 14,
          "last_modif_ms": 1596958203000,
          "ldoc_type": "docx_doc",
          "path": "D:\\My Documents\\doc\\IT_Diary\\IT Diary $UPD 2018-03.docx"
        }
      },
      {
        "_index": "booby",
        "_id": "jLgt6ooBpXznUptX4lSG",
        "_score": 1,
        "_source": {
          "docx_id": 12,
          "last_modif_ms": 1596958248000,
          "ldoc_type": "docx_doc",
          "path": "D:\\My Documents\\doc\\IT_Diary\\IT Diary $UPD 2018-01.docx"
        }
      },
      {
        "_index": "booby",
        "_id": "1bgt6ooBpXznUptX4lSX",
        "_score": 1,
        "_source": {
          "docx_id": 20,
          "last_modif_ms": 1596958053000,
          "ldoc_type": "docx_doc",
          "path": "D:\\My Documents\\doc\\IT_Diary\\IT Diary $UPD 2018-09.docx"
        }
      },
      {
        "_index": "booby",
        "_id": "7rgt6ooBpXznUptX4lSm",
        "_score": 1,
        "_source": {
          "docx_id": 21,
          "last_modif_ms": 1596958032000,
          "ldoc_type": "docx_doc",
          "path": "D:\\My Documents\\doc\\IT_Diary\\IT Diary $UPD 2018-10.docx"
        }
      },
      {
        "_index": "booby",
        "_id": "_rgt6ooBpXznUptX4lS2",
        "_score": 1,
        "_source": {
          "docx_id": 22,
          "last_modif_ms": 1596957990000,
          "ldoc_type": "docx_doc",
          "path": "D:\\My Documents\\doc\\IT_Diary\\IT Diary $UPD 2018-11.docx"
        }
      }
    ]
  }
}

Solution

  • It is best to model your own types that reflect the expected JSON structure instead of using a generic Value. You can do that by implementing serde::Deserialize. Something like this (only some fields implemented):

    use serde::Deserialize;
    use serde_json::json;
    
    #[derive(Debug, Deserialize)]
    struct Response {
        // _scroll_id
        // took
        // timed_out
        // _shards
        hits: HitsResponse,
    }
    
    #[derive(Debug, Deserialize)]
    struct HitsResponse {
        // total
        // max_score
        hits: Vec<Hit>,
    }
    
    #[derive(Debug, Deserialize)]
    struct Hit {
        // _index
        _id: String,
        // _score
        _source: HitSource,
    }
    
    #[derive(Debug, Deserialize)]
    struct HitSource {
        // docx_id
        // last_modif_ms,
        // ldoc_type
        path: String,
    }
    

    Parsing your JSON into a Response above would yield this:

    let response: Response = serde_json::from_str(&text).unwrap();
    
    Response {
        hits: HitsResponse {
            hits: [
                Hit {
                    _id: "Ybgt6ooBpXznUptX4lR2",
                    _source: HitSource {
                        path: "D:\\My Documents\\doc\\IT_Diary\\IT Diary $UPD 2018-03.docx",
                    },
                },
                Hit {
                    _id: "jLgt6ooBpXznUptX4lSG",
                    _source: HitSource {
                        path: "D:\\My Documents\\doc\\IT_Diary\\IT Diary $UPD 2018-01.docx",
                    },
                },
                Hit {
                    _id: "1bgt6ooBpXznUptX4lSX",
                    _source: HitSource {
                        path: "D:\\My Documents\\doc\\IT_Diary\\IT Diary $UPD 2018-09.docx",
                    },
                },
                Hit {
                    _id: "7rgt6ooBpXznUptX4lSm",
                    _source: HitSource {
                        path: "D:\\My Documents\\doc\\IT_Diary\\IT Diary $UPD 2018-10.docx",
                    },
                },
                Hit {
                    _id: "_rgt6ooBpXznUptX4lS2",
                    _source: HitSource {
                        path: "D:\\My Documents\\doc\\IT_Diary\\IT Diary $UPD 2018-11.docx",
                    },
                },
            ],
        },
    }
    

    playground link

    And if you're using reqwest, you can use .json directly instead of .text or .bytes + serde_json (by enabling its "json" feature).