Search code examples
mongodbaggregation-frameworkpipelinesubdocument

How do I limit/filter entries in a sub document?


Answer is at bottom of my question

I am close to getting what I want... but just a little bit off.

what I want to return is all active kennels and if any kennel has a booking(s), for a specific Year (2024), an array of dayOfYear (eg [100,101,102,103]) and an array of rooms (eg [1,2] )

Here is a Mongo playground

This is what I am getting now

[
        {
            "_id": "65ef79a2331ab6aef4fae5d4",
            "identifier": "1.1.6x10",
            "active": true,
            "partition": "1",
            "length": 10,
            "width": 6,
            "bookings": [
                {
                    "_id": "65ef88f444e7d6607498ac2e",
                    "year": 2024,
                    "dayOfYear": 100,
                    "duration": 6
                },
                {
                    "_id": "65f0ca69f2667460e600a46a",
                    "year": 2024,
                    "dayOfYear": 107,
                    "duration": 1
                }
            ]
        }
]

this is what I would like to get - all kennels, AND the kennel with identifier "1.1.6x10" should show a single booking for 2024 - 100, (for clarity - bookings for any Kennel with a booking on 2024 - 100 should also be returned, but for this example data, only the 1.1.6x10 kennel has a booking)

[
        {
            "_id": "65ef79a2331ab6aef4fae5d4",
            "identifier": "1.1.6x10",
            "active": true,
            "partition": "1",
            "length": 10,
            "width": 6,
            "bookings": [
                {
                    "_id": "65ef88f444e7d6607498ac2e",
                    "year": 2024,
                    "dayOfYear": 100,
                    "duration": 6
                }
            ]
        },
        {
            "_id": "65ef79a2331ab6aef4fae5d5",
            "identifier": "1.2.6x10",
            "active": true,
            "partition": "2",
            "length": 10,
            "width": 6,
            "bookings": []
        },
        {
            "_id": "65ef79a3331ab6aef4fae5e5",
            "identifier": "2.1.4x10",
            "active": true,
            "partition": "1",
            "length": 10,
            "width": 4,
            "bookings": []
        },
        {
            "_id": "65ef79a3331ab6aef4fae5e6",
            "identifier": "2.2.4x10",
            "active": true,
            "partition": "2",
            "length": 10,
            "width": 4,
            "bookings": []
        },
        {
            "_id": "65ef79a3331ab6aef4fae5e7",
            "identifier": "2.3.4x10",
            "active": true,
            "partition": "3",
            "length": 10,
            "width": 4,
            "bookings": []
        }
    ]

Here is the mongodb pipeline I am using, aggregating on 'kennels'

  let pipeline = [
    {
      $lookup: {
        from: 'rooms',
        localField: 'REF_RoomID',
        foreignField: '_id',
        as: 'room',
      },
    },
    {
      $unwind: {
        path: '$room',
        preserveNullAndEmptyArrays: false,
      },
    },
    {
      $lookup: {
        from: 'bookings',
        localField: '_id',
        foreignField: 'REF_KennelID',
        as: 'bookings',
      },
    },
    {
      $match: {
        $and: [
          { active: true },
          { 'room.number': { $in: [1,2] } },
          //
          // this is my problem area
          //{ 'bookings.year': { $eq: 2024 } },
          //{ 'bookings.dayOfYear': { $eq: 100 } },
        ],
      },
    },
    {
      $project: {
        REF_RoomID: 0,
        room: 0,
        'bookings.REF_KennelID': 0,
        'bookings.__v': 0,
      },
    },
  ];

Here is some data:

Rooms

{
  "_id":  "65ef799f331ab6aef4fae5ba",
  "number": "1",
  "length": "10",
  "width": "12"
},
{
  "_id": "65ef79a0331ab6aef4fae5be",
  "number": "2",
  "length": "10",
  "width": "12"
}

Kennels

{
  "_id": "65ef79a2331ab6aef4fae5d4",
  "REF_RoomID":  "65ef799f331ab6aef4fae5ba",
  "identifier": "1.1.6x10",
  "active": true,
  "partition": "1",
  "length": 10,
  "width": 6
},
{
  "_id": "65ef79a2331ab6aef4fae5d5",
  "REF_RoomID": "65ef799f331ab6aef4fae5ba",
  "identifier": "1.2.6x10",
  "active": true,
  "partition": "2",
  "length": 10,
  "width": 6
},
{
  "_id": "65ef79a3331ab6aef4fae5e5",
  "REF_RoomID": "65ef79a0331ab6aef4fae5be",
  "identifier": "2.1.4x10",
  "active": true,
  "partition": "1",
  "length": 10,
  "width": 4
},
{
  "_id": "65ef79a3331ab6aef4fae5e6",
  "REF_RoomID": "65ef79a0331ab6aef4fae5be",
  "identifier": "2.2.4x10",
  "active": true,
  "partition": "2",
  "length": 10,
  "width": 4
},
{
  "_id": "65ef79a3331ab6aef4fae5e7",
  "REF_RoomID": "65ef79a0331ab6aef4fae5be",
  "identifier": "2.3.4x10",
  "active": true,
  "partition": "3",
  "length": 10,
  "width": 4
}

Bookings

{
  "_id": "65ef88f444e7d6607498ac2e",
  "REF_KennelID": "65ef79a2331ab6aef4fae5d4",
  "year": 2024,
  "dayOfYear": 100,
  "duration": 6
},
{
  "_id": "65f0ca69f2667460e600a46a",
  "REF_KennelID": "65ef79a2331ab6aef4fae5d4",
  "year": 2024,
  "dayOfYear": 107,
  "duration": 1
}

thank you !

ANSWER

here is the pipeline that works - thanks to https://stackoverflow.com/users/1431750/aneroid for pushing me down the right path !

let pipeline = [
    {
      $match: {
        active: true,
      },
    },
    {
      $lookup: {
        from: "rooms",
        localField: "REF_RoomID",
        foreignField: "_id",
        as: "room",
      },
    },
    {
      $match: {
        "room.number": {
          $in: ["1","2"],
        },
      },
    },
    {
      $unwind: {
        path: "$room",
        preserveNullAndEmptyArrays: true,
      },
    },
    {
      $lookup: {
        from: "bookings",
        localField: "_id",
        foreignField: "REF_KennelID",
        pipeline: [
          {
            $match: {
              year: {
                $in: [2024],
              },
              dayOfYear: {
                $in: [100],
              },
            },
          },
        ],
        as: "bookings",
      },
    },
    {
      $unwind: {
        path: "$bookings",
        preserveNullAndEmptyArrays: true,
      },
    },
    {
      $group: {
        _id: "$_id",
        identifier: {
          $first: "$identifier",
        },
        active: {
          $first: "$active",
        },
        partition: {
          $first: "$partition",
        },
        length: {
          $first: "$length",
        },
        width: {
          $first: "$width",
        },
        bookings: {
          $push: "$bookings",
        },
      },
    },
    {
      $project: {
        "bookings.REF_KennelID": 0,
        "bookings.__v": 0,
      },
    },
    {
      $sort: {
        identifier: 1,
      },
    },
  ];

Solution

    1. The $match stage for kennels being active should be first in your pipeline. So the lookups won't be performed for unmatched kennels which improves performance.

    2. In the first $lookup stage, use a lookup-pipeline instead of only field-equality followed by a match for room numbers. This is needed because "I want to return is all active kennels".

      • In the current form, any active kennel which doesn't have room "1" or "2" would not show up in the results at all.
    3. Same goes for the second $lookup stage - use a pipeline to get all kennels regardless of whether they have matching bookings or not, or even any bookings.

    db.kennels.aggregate([
      { $match: { active: true } },
      {
        $lookup: {
          from: "rooms",
          let: { searchRoomID: "$REF_RoomID" },
          pipeline: [
            {
              $match: {
                $expr: {
                  $and: [
                    { $eq: ["$_id", "$$searchRoomID"] },
                    { $in: ["$number", ["1", "2"]] }
                  ]
                }
              }
            }
          ],
          as: "room"
        }
      },
      {
        $unwind: {
          path: "$room",
          preserveNullAndEmptyArrays: true
        }
      },
      {
        $lookup: {
          from: "bookings",
          let: { searchKennelID: "$_id" },
          pipeline: [
            {
              $match: {
                $expr: {
                  $and: [
                    { $eq: ["$REF_KennelID", "$$searchKennelID"] },
                    { $eq: ["$year", 2024] },
                    { $in: ["$dayOfYear", [100, 101, 102, 103]] }
                  ]
                }
              }
            }
          ],
          as: "bookings"
        }
      },
      {
        $unwind: {
          path: "$bookings",
          preserveNullAndEmptyArrays: true
        }
      },
      {
        $group: {
          _id: "$_id",
          identifier: { $first: "$identifier" },
          active: { $first: "$active" },
          partition: { $first: "$partition" },
          length: { $first: "$length" },
          width: { $first: "$width" },
          bookings: { $push: "$bookings" }
        }
      },
      {
        $project: {
          "bookings.REF_KennelID": 0,
          "bookings.__v": 0
        }
      }
    ])
    

    Mongo Playground

    Notes:

    • You probably need some $sort stage at the end or before the $group.
    • It's unclear why you're doing a lookup with rooms at all.
      • You check for room = "1" or "2" but then you remove it from the results, and you're returning all kennels anyway. So a kennel with only room "3" would be in the results regardless of that condition.
      • bookings refer to kennels and not rooms so the room lookup adds no value here. Your links to bookings and rooms are independent of each other and only connected via kennels.
    • If you actually only want kennels which have room "1" or "2", and NOT the full list of kennels, then change the unwind after the room-lookup from preserveNullAndEmptyArrays: true to false. So any kennels which don't have those rooms will not be in the results.