Search code examples
jsonmongodbelasticsearchaggregation-frameworkalgolia

MongoDb Aggregation Data manipulation - Objects to Arrays


I have the following example set of data

[{
  "_id": {
    "$oid": "60f83d3cd66842301905aa77"
  },
  "id": 527438,
  "name": "CryptoPunk #4050",
  "asset_contract": {
    "name": "CryptoPunks",
    "address": "0xb47e3cd837ddf8e4c57f05d70ab865de6e193bbb"
  },
  "traits": [
    {
      "trait_type": "type",
      "value": "Male",
      "display_type": null,
      "max_value": null,
      "trait_count": 6039,
      "order": null
    },
    {
      "trait_type": "accessory",
      "value": "Mohawk",
      "display_type": null,
      "max_value": null,
      "trait_count": 441,
      "order": null
    },
    {
      "trait_type": "accessory",
      "value": "Earring",
      "display_type": null,
      "max_value": null,
      "trait_count": 2459,
      "order": null
    },
    {
      "trait_type": "accessory",
      "value": "Frown",
      "display_type": null,
      "max_value": null,
      "trait_count": 261,
      "order": null
    }
  ],
  "token_id": "4050",
  "permalink": "https://opensea.io/assets/0xb47e3cd837ddf8e4c57f05d70ab865de6e193bbb/4050",
  "background_color": null,
  "image_url": "https://lh3.googleusercontent.com/sO18rDQYhC5yIcj12RVsv31pbbsZo_2muQQbTJMQHn47EKGhnirs8mxzohm58HAZ7taBoe4pU6x1qntlExk_TtJ-",
  "image_preview_url": "https://lh3.googleusercontent.com/sO18rDQYhC5yIcj12RVsv31pbbsZo_2muQQbTJMQHn47EKGhnirs8mxzohm58HAZ7taBoe4pU6x1qntlExk_TtJ-=s250",
  "animation_url": null,
  "vault_contract": "0x269616d549d7e8eaa82dfb17028d0b212d11232a"
},{
  "_id": { "$oid": "60f83d3cbc3f0161da2141f7" },
  "id": 17736625,
  "name": "OJ Simpson",
  "asset_contract": {
    "name": "Hashmasks",
    "address": "0xc2c747e0f7004f9e8817db2ca4997657a7746928"
  },
  "traits": [
    {
      "trait_type": "Character",
      "value": "Male",
      "display_type": null,
      "max_value": null,
      "trait_count": 8659,
      "order": null
    },
    {
      "trait_type": "Mask",
      "value": "Doodle",
      "display_type": null,
      "max_value": null,
      "trait_count": 2187,
      "order": null
    },
    {
      "trait_type": "Eye Color",
      "value": "Dark",
      "display_type": null,
      "max_value": null,
      "trait_count": 7419,
      "order": null
    },
    {
      "trait_type": "Item",
      "value": "No Item",
      "display_type": null,
      "max_value": null,
      "trait_count": 14533,
      "order": null
    },
    {
      "trait_type": "Skin Color",
      "value": "Dark",
      "display_type": null,
      "max_value": null,
      "trait_count": 3784,
      "order": null
    },
    {
      "trait_type": "Token ID",
      "value": 3535,
      "display_type": "number",
      "max_value": null,
      "trait_count": 0,
      "order": null
    },
    {
      "trait_type": "Background",
      "value": "Doodle",
      "display_type": null,
      "max_value": null,
      "trait_count": 5538,
      "order": null
    }
  ],
  "token_id": "3535",
  "permalink": "https://opensea.io/assets/0xc2c747e0f7004f9e8817db2ca4997657a7746928/3535",
  "background_color": null,
  "image_url": "https://lh3.googleusercontent.com/NZQu7CNjgJ_1uhbUVwEb-14rZPJmPCaqaXy0qnUpgm5Qll0BvmmF7tPMjBhFH6ZZp_qzOPxHi0NFmRkOjHoBQ0BODcWI8NlyBXLu",
  "image_preview_url": "https://lh3.googleusercontent.com/NZQu7CNjgJ_1uhbUVwEb-14rZPJmPCaqaXy0qnUpgm5Qll0BvmmF7tPMjBhFH6ZZp_qzOPxHi0NFmRkOjHoBQ0BODcWI8NlyBXLu=s250",
  "animation_url": null,
  "vault_contract": "0xc7a8b45e184138114e6085c82936a8db93dd156a"
}]

which I would like to be updated to

[{
    "_id": {
      "$oid": "60f83d3cd66842301905aa77"
    },
    "id": 527438,
    "name": "CryptoPunk #4050",
    "asset_contract": {
      "name": "CryptoPunks",
      "address": "0xb47e3cd837ddf8e4c57f05d70ab865de6e193bbb"
    },
    "traits":
      {
        "type": "Male",
        "accessory": ["Mohawk", "Earing", "Frown"], 
      },
    "token_id": "4050",
    "permalink": "https://opensea.io/assets/0xb47e3cd837ddf8e4c57f05d70ab865de6e193bbb/4050",
    "background_color": null,
    "image_url": "https://lh3.googleusercontent.com/sO18rDQYhC5yIcj12RVsv31pbbsZo_2muQQbTJMQHn47EKGhnirs8mxzohm58HAZ7taBoe4pU6x1qntlExk_TtJ-",
    "image_preview_url": "https://lh3.googleusercontent.com/sO18rDQYhC5yIcj12RVsv31pbbsZo_2muQQbTJMQHn47EKGhnirs8mxzohm58HAZ7taBoe4pU6x1qntlExk_TtJ-=s250",
    "animation_url": null,
    "vault_contract": "0x269616d549d7e8eaa82dfb17028d0b212d11232a"
  },{
  "_id": { "$oid": "60f83d3cbc3f0161da2141f7" },
  "id": 17736625,
  "name": "OJ Simpson",
  "asset_contract": {
    "name": "Hashmasks",
    "address": "0xc2c747e0f7004f9e8817db2ca4997657a7746928"
  },
  "traits": {
      "character": "Male",
      "mask": "Doodle",
      "eye_color": "Dark",
      "item": "No Item",
      "skin_color": "Dark",
      "token_id": 3535,
      "background": "Doodle",
    },
  "token_id": "3535",
  "permalink": "https://opensea.io/assets/0xc2c747e0f7004f9e8817db2ca4997657a7746928/3535",
  "background_color": null,
  "image_url": "https://lh3.googleusercontent.com/NZQu7CNjgJ_1uhbUVwEb-14rZPJmPCaqaXy0qnUpgm5Qll0BvmmF7tPMjBhFH6ZZp_qzOPxHi0NFmRkOjHoBQ0BODcWI8NlyBXLu",
  "image_preview_url": "https://lh3.googleusercontent.com/NZQu7CNjgJ_1uhbUVwEb-14rZPJmPCaqaXy0qnUpgm5Qll0BvmmF7tPMjBhFH6ZZp_qzOPxHi0NFmRkOjHoBQ0BODcWI8NlyBXLu=s250",
  "animation_url": null,
  "vault_contract": "0xc7a8b45e184138114e6085c82936a8db93dd156a"
}]

The logic behind it would be

  • Look at the Traits array objects
  • get the trait_type value and create a new key using the lower case name (underscores for spaces)
  • Set the value of the new key to be the value of "value"

So,

      "trait_type": "type",
      "value": "Male",

//becomes

            "type": "Male"
  • Where there are multiple instances of the same trait type, create an array of values.

So,

{
      "trait_type": "accessory",
      "value": "Mohawk",
      "display_type": null,
      "max_value": null,
      "trait_count": 441,
      "order": null
    },
    {
      "trait_type": "accessory",
      "value": "Earring",
      "display_type": null,
      "max_value": null,
      "trait_count": 2459,
      "order": null
    },

// becomes

            "accessory": ["Mohawk", "Earring"]

Solution

  • Query

    • its an aggregation update even if pipeline doesn't allow us to use stages like group lookup etc that here is used. (you can use $out and replace collection after or $merge to replace documents(similar to update))

    • first map

      • for each trait(document member of traits), it makes it into array
        [["trait_type": "type"] ["value": "Male"] ["display_type": null] ...]
      • reduce on that array to contruct from them 1 document only
        {"type" "type","value" :"Male"} (does also that lowercase and "_")
    • Now traits its like

      "traits": [
        {
          "type": "type",
          "value": "Male"
        },
        {
          "type": "accessory",
          "value": "Mohawk"
        },
        {
          "type": "accessory",
          "value": "Earring"
        },
        {
          "type": "accessory",
          "value": "Frown"
        }
      ]
      
    • lookup with the dummy collection [{}] (we do that to make a group inside that array) its like a trick that allows us to use stage operators inside 1 document

      • lookup pipeline unwinds and groups by type
      "traits": [
        {
          "values": [
            "Mohawk",
            "Earring",
            "Frown"
          ],
          "type": "accessory"
        },
        {
          "values": [
            "Male"
          ],
          "type": "type"
        }
      ]
      
      • then its a replace root to do take the value of type, make it the field-name and the values as value (if size=1 removes the array)
    • After lookup we have

      "traits": [
        {
          "accessory": [
            "Mohawk",
            "Earring",
            "Frown"
          ]
        },
        {
          "type": "Male"
        }
      ]
      
    • so all we have to do is to reduce that traits and merge the objects (keys are unique anyways because we grouped by them)

    • and we get the expected output (at least i think its ok)

    Test code here

    db.collection.aggregate([
      {
        "$set": {
          "traits": {
            "$map": {
              "input": "$traits",
              "as": "t",
              "in": {
                "$reduce": {
                  "input": {
                    "$map": {
                      "input": {
                        "$objectToArray": "$$t"
                      },
                      "as": "m",
                      "in": [
                        "$$m.k",
                        "$$m.v"
                      ]
                    }
                  },
                  "initialValue": {},
                  "in": {
                    "$let": {
                      "vars": {
                        "type_value": "$$value",
                        "ta": "$$this"
                      },
                      "in": {
                        "$let": {
                          "vars": {
                            "key": {
                              "$arrayElemAt": [
                                "$$ta",
                                0
                              ]
                            },
                            "value": {
                              "$arrayElemAt": [
                                "$$ta",
                                1
                              ]
                            }
                          },
                          "in": {
                            "$switch": {
                              "branches": [
                                {
                                  "case": {
                                    "$eq": [
                                      "$$key",
                                      "value"
                                    ]
                                  },
                                  "then": {
                                    "$mergeObjects": [
                                      "$$type_value",
                                      {
                                        "value": "$$value"
                                      }
                                    ]
                                  }
                                },
                                {
                                  "case": {
                                    "$eq": [
                                      "$$key",
                                      "trait_type"
                                    ]
                                  },
                                  "then": {
                                    "$mergeObjects": [
                                      "$$type_value",
                                      {
                                        "type": {
                                          "$replaceAll": {
                                            "input": {
                                              "$toLower": "$$value"
                                            },
                                            "find": " ",
                                            "replacement": "_"
                                          }
                                        }
                                      }
                                    ]
                                  }
                                }
                              ],
                              "default": "$$type_value"
                            }
                          }
                        }
                      }
                    }
                  }
                }
              }
            }
          }
        }
      },
      {
        "$lookup": {
          "from": "dummy",
          "let": {
            "traits": "$traits"
          },
          "pipeline": [
            {
              "$set": {
                "traits": "$$traits"
              }
            },
            {
              "$unwind": {
                "path": "$traits"
              }
            },
            {
              "$replaceRoot": {
                "newRoot": "$traits"
              }
            },
            {
              "$group": {
                "_id": "$type",
                "values": {
                  "$push": "$value"
                }
              }
            },
            {
              "$set": {
                "type": "$_id"
              }
            },
            {
              "$project": {
                "_id": 0
              }
            },
            {
              "$replaceRoot": {
                "newRoot": {
                  "$cond": [
                    {
                      "$eq": [
                        {
                          "$size": "$values"
                        },
                        1
                      ]
                    },
                    {
                      "$arrayToObject": {
                        "$let": {
                          "vars": {
                            "pair": [
                              [
                                "$type",
                                {
                                  "$arrayElemAt": [
                                    "$values",
                                    0
                                  ]
                                }
                              ]
                            ]
                          },
                          "in": "$$pair"
                        }
                      }
                    },
                    {
                      "$arrayToObject": {
                        "$let": {
                          "vars": {
                            "pair": [
                              [
                                "$type",
                                "$values"
                              ]
                            ]
                          },
                          "in": "$$pair"
                        }
                      }
                    }
                  ]
                }
              }
            }
          ],
          "as": "traits"
        }
      },
      {
        "$set": {
          "traits": {
            "$mergeObjects": "$traits"
          }
        }
      }
    ])