Search code examples
mongodbsumpymongoaverageaggregation

MongoDB sum of all values in an object where we do not know the keys in the object


Example document 1

{
    "site": "xyz.com",
    "Reg_NumReviews": {
        "value1": "194",
        "value53": "3,570"
    },
    "Reg_Score": {
        "value1": "3.24",
        "value53": "1.9"
    }
}

desired document after modification

{
    "site": "xyz.com",
    "Reg_NumReviews": {
        "value1": "194",
        "value53": "3,570"
    },
    "Reg_Score": {
        "value1": "3.24",
        "value53": "1.9"
    },
    "Total_reviews" : "3764"
    "Average_score" : 1.97
    
}

Example document 2

{
    "site": "werw.com",
    "Reg_NumReviews": {
        "value1": "194",
        "value33": "2014",
        "value5": "234"
    },
    "Reg_Score": {
        "value1": "2.24",
        "value33": "3.9",
        "value33": "3",
    }
}

desired document after modification

{
    "site": "werw.com",
    "Reg_NumReviews": {
        "value1": "194",
        "value33": "2014",
        "value5": "234"
    },
    "Reg_Score": {
        "value1": "2.24",
        "value33": "3.9",
        "value5": "3",
    },
    "Total_reviews" : "2442"
    "Average_score" : "3.68"
    
}

Code I have tried is a variant of the following.

db.SiteData.update({"site": "xyz.com" }, 
[ {"$set": { "Total_reviews" : 
  {"$sum" : {"$regex" : /Reg_NumReviews.\.*/} }}}],
  {upsert:true})
    

If I was to describe it in words:

  1. find the document with the site matching the necessary pattern.
  2. check Reg_NumReviews is a key in the object. If yes, start a set operation. Sum the value of all the key-value pairs, assign as Total_reviews.
  3. moving to the next operation, which is slightly more involved. Find the matching keys in Reg_NumReviews and Reg_Scores and multiply them. Sum all these values and divide by Total_reviews, assign as Average_score.

Edit To clarify step 3 For example in Document two

The effect would be:

Average_score = 
  (   
    ( Reg_NumReviews.value1 * RegScore.value1 ) + 
    ( Reg_NumReviews.value33 * RegScore.value33 ) +
    ( Reg_NumReviews.value5 * RegScore.value5 )  
  ) /  Total_reviews 

Solution

  • If You're using Mongo version 4.4+ you can use this update which converts the fields using $objectToArray, then for each it sums them up accordingly, the reason we require v4.4 is due to the usage of $replaceAll which is used to remove the , from the number string to allow Mongo to convert it into a number.

    If you are using a lesser Mongo version you can still achieve the same result you will just have to remove the , in a different method, for example using $split on , as a delimiter and then $concat to reconstruct the string.

    db.collection.updateMany(
    {
      "site": "xyz.com"
    },
    [
      {
        "$set": {
          "Total_reviews": {
            $sum: {
              $map: {
                input: {
                  $objectToArray: "$Reg_NumReviews"
                },
                as: "datum",
                in: {
                  $toLong: {
                    $replaceAll: {
                      input: "$$datum.v",
                      find: ",",
                      replacement: ""
                    }
                  }
                }
              }
            }
          },
          Average_score: {
            $avg: {
              $map: {
                input: {
                  $objectToArray: "$Reg_Score"
                },
                as: "datum",
                in: {
                  $toDouble: "$$datum.v"
                }
              }
            }
          }
        }
      }
    ])
    

    Mongo Playground ---- EDIT -----

    under the assumption that Reg_score and Reg_NumReviews have the same number of keys you can use $zip to calculate your last requirement, like so:

    db.collection.updateMany(
    {
      "site": "xyz.com"
    },
    [
      {
        $set: {
          tmpReviews: {
            $map: {
              input: {
                $objectToArray: "$Reg_NumReviews"
              },
              as: "datum",
              in: {
                $toLong: {
                  $replaceAll: {
                    input: "$$datum.v",
                    find: ",",
                    replacement: ""
                  }
                }
              }
            }
          },
          tmpScores: {
            $map: {
              input: {
                $objectToArray: "$Reg_Score"
              },
              as: "datum",
              in: {
                $toDouble: "$$datum.v"
              }
            }
          }
        }
      },
      {
        "$set": {
          "Total_reviews": {
            $sum: "$tmpReviews"
          },
          Average_score: {
            $avg: "$tmpScores"
          },
          New_complicated_field: {
            $divide: [
              {
                $sum: {
                  $map: {
                    input: {
                      $zip: {
                        inputs: [
                          "$tmpReviews",
                          "$tmpScores"
                        ]
                      }
                    },
                    as: "zipped",
                    in: {
                      $multiply: [
                        {
                          $arrayElemAt: [
                            "$$zipped",
                            0
                          ]
                        },
                        {
                          $arrayElemAt: [
                            "$$zipped",
                            1
                          ]
                        }
                      ]
                    }
                  }
                }
              },
              {
                $sum: "$tmpReviews"
              }
            ]
          }
        }
      },
      {
        $unset: {
          tmpReviews: 1,
          tmpScores: 1
        }
      }
    ])
    

    Mongo Playground