Search code examples
javascriptmongodbmongodb-queryaggregation-frameworklookup

Draft an aggregate to group based on a nested attribute with lookup


There are these two collections as listed below

orders: [
  {
    "_id": "64355c928dcce8cdf4b9c7d2",
    "destinations": [
      {
        "ship_to_id": "64355c92af10d37993473e12", // mongoose id from locations collection
        "sold_to_id": "64355c92a57d8b29412a1cc2" // mongoose id from locations collection
      },
      {
        "ship_to_id": "64355c92a57d8b29412a1cc2",
        "sold_to_id": "64355c92ed8af3f7cd7199b2"
      },
      {
        "ship_to_id": "64355c92256aa652e6c3fdc5",
        "sold_to_id": "64355c924f9a2fcafa90daed"
      }
    ],
    "contact_details": [
      {
        "ship_to_ref": "aaa",
        "sold_to_ref": "bbb",
        "contact_email": "a@gmail.com",
      },
      {
        "ship_to_ref": "bbb",
        "sold_to_ref": "ccc",
        "contact_email": "b@gmail.com"
      },
      {
        "ship_to_ref": "ddd",
        "sold_to_ref": "eee",
        "contact_email": "c@gmail.com",
      }
    ]
  },
  {
    "_id": "64355c92bf25e54cf901be39",
    "destinations": [
      {
        "ship_to_id": "64355c92af10d37993473e12",
        "sold_to_id": "64355c92a57d8b29412a1cc2"
      },
      {
        "ship_to_id": "64355c92a57d8b29412a1cc2",
        "sold_to_id": "64355c92ed8af3f7cd7199b2"
      },
      {
        "ship_to_id": "64355c92af10d37993473e12",
        "sold_to_id": "64355c92256aa652e6c3fdc5"
      }
    ],
    "contact_details": [
      {
        "ship_to_ref": "aaa",
        "sold_to_ref": "bbb",
        "contact_email": "a@gmail.com",
      },
      {
        "ship_to_ref": "bbb",
        "sold_to_ref": "ccc",
        "contact_email": "b@gmail.com",
      },
      {
        "ship_to_ref": "aaa",
        "sold_to_ref": "ddd",
        "contact_email": "d@gmail.com",
      }
    ]
  },
  {
    "_id": "64355c921445785f4b50040e",
    "destinations": [
      {
        "ship_to_id": "64355c92af10d37993473e12",
        "sold_to_id": "64355c92a57d8b29412a1cc2"
      },
      {
        "ship_to_id": "64355c92af10d37993473e12",
        "sold_to_id": "64355c92ed8af3f7cd7199b2"
      }
    ],
    "contact_details": [
      {
        "ship_to_ref": "aaa",
        "sold_to_ref": "bbb",
        "contact_email": "a@gmail.com",
      },
      {
        "ship_to_ref": "aaa",
        "sold_to_ref": "ccc",
        "contact_email": "e@gmail.com",
      },
    ]
  }
]

and

locations: [
  {
    "_id": "64355c92af10d37993473e12",
    "reference_id": "aaa"
  },
  {
    "_id": "64355c92a57d8b29412a1cc2",
    "reference_id": "bbb"
  },
  {
    "_id": "64355c92ed8af3f7cd7199b2",
    "reference_id": "ccc"
  },
  {
    "_id": "64355c92256aa652e6c3fdc5",
    "reference_id": "ddd"
  },
  {
    "_id": "64355c924f9a2fcafa90daed",
    "reference_id": "eee"
  },
]

Now what I want to achieve is write an aggregate query that groups me all the indents based on same ship_to, sold_to and contact_email (Note: to verify if thats a valid ship_to and sold_to we need to lookup from the locations collection using the ship_to_id and sold_to_id, retrieve the respective reference_id 's and then compare it with the ship_to_ref and sold_to_ref that is available in contact details) such that the query result will be like

[{
  _id: {
    ship_to_ref: "aaa",
    sold_to_ref: "bbb",
    ship_to_id:"64355c92af10d37993473e12"
    sold_to_id:"64355c92a57d8b29412a1cc2"
    contact_email: "a@gmail.com"
   },
   orders: [
     { "_id": "64355c928dcce8cdf4b9c7d2", ... },
     { "_id": "64355c92bf25e54cf901be39", ... },
     { "_id" : "64355c921445785f4b50040e", ...}
   ],
 },
 {
  _id: {
    ship_to_ref: "bbb",
    sold_to_ref: "ccc",
    ship_to_id:"64355c92a57d8b29412a1cc2",
    sold_to_id:"64355c92ed8af3f7cd7199b2",
    contact_email: "b@gmail.com"
   },
   orders: [
     { "_id": "64355c928dcce8cdf4b9c7d2", ... },
     { "_id": "64355c92bf25e54cf901be39", ... },
   ],
 },
 {
  _id: {
    ship_to_ref: "ddd",
    sold_to_ref: "eee",
    ship_to_id: "64355c92256aa652e6c3fdc5",
    sold_to_id: "64355c924f9a2fcafa90daed",
    "contact_email": "c@gmail.com",
   },
   orders: [
     { "_id": "64355c928dcce8cdf4b9c7d2", ... }
   ],
 },
{
  _id: {
    ship_to_ref: "aaa",
    sold_to_ref: "ddd",
    ship_to_id: "64355c92af10d37993473e12",
    sold_to_id: "64355c92256aa652e6c3fdc5",
    contact_email: "d@gmail.com",
   },
   orders: [
     { "_id": "64355c92bf25e54cf901be39", ... }
   ],
 },
{
  _id: {
    ship_to_ref: "aaa",
    sold_to_ref: "ccc",
    ship_to_id: "64355c92af10d37993473e12",
    sold_to_id: "64355c92ed8af3f7cd7199b2",
    contact_email: "e@gmail.com",
   },
   orders: [
     { "_id": "64355c921445785f4b50040e", ... }
   ],
 },
]

Below is an aggregate query that I tried

[{
  $match: {
    contact_details: { $exists: true },
  },
},
{ $unwind: '$destinations' },
{ $unwind: '$contact_details'},
{
  $group: {
    _id: {
      sold_to_id: '$destinations.sold_to_id',
      ship_to_id: '$destinations.ship_to_id',
    }
    orders: {
      $push: {
        id: '$order_no', // order_no is an attribute from an order object
        contact_details: '$contact_details',
        destinations: '$destinations', // should contain all the destinations that were there initially inside an order entry
      },
    },
    type: { $first: { type: '$destinations.type' } },
  },
}]

but not sure how to incorporate a lookup and do the ref comparison and email check. Need help.

Thanks in advance


Solution

  • One option is to $map before you unwind, so you can only unwind once and use $indexOfArray in order to match the references after the $lookup:

    db.orders.aggregate([
      {$match: {contact_details: {$exists: true}}},
      {$project: {
          del: {$map: {
              input: {$range: [0, {$size: "$contact_details"}]},
              in: {
                destination: {$arrayElemAt: ["$destinations", "$$this"]},
                contact: {$arrayElemAt: ["$contact_details", "$$this"]},
                destinations: "$destinations"
              }
          }}
      }},
      {$unwind: "$del"},
      {$group: {
          _id: {
            sold_to_id: "$del.destination.sold_to_id",
            ship_to_id: "$del.destination.ship_to_id"
          },
          contact_email: {$first: "$del.contact.contact_email"},
          orders: {$push: {
              _id: "$_id", 
              contact_details: "$del.contact",
              destinations: "$del.destinations"
          }}
      }},
      {$set: {locations: ["$_id.sold_to_id", "$_id.ship_to_id"]}},
      {$lookup: {
          from: "locations",
          localField: "locations",
          foreignField: "_id",
          as: "locations"
      }},
      {$project: {
          "_id.ship_to_id": "$_id.ship_to_id",
          "_id.sold_to_id": "$_id.sold_to_id",
          "_id.contact_email": "$contact_email",
          "_id.ship_to_ref": {$getField: {
              input: {$arrayElemAt: [
                  "$locations",
                  {$indexOfArray: ["$locations._id", "$_id.ship_to_id"]}
              ]},
              field: "reference_id"
          }},
          "_id.sold_to_ref": {$getField: {
              input: {$arrayElemAt: [
                  "$locations",
                  {$indexOfArray: ["$locations._id", "$_id.sold_to_id"]}
              ]},
              field: "reference_id"
          }},
          orders: 1
      }}
    ])
    

    See how it works on the playground example