Search code examples
mongodbindexingaggregation

MongoDB - How to properly index array with embedded documents


need your help with indexing...

I have documents that contain the below data structure - (several documents in an array).

[
  {
    "Traits" : {
        "Meters" : [
            {
                "Type" : "Device",
                "Name" : "playerCashableAmt",
                "Value" : NumberInt(2052846000),
                "DeviceClass" : "cabinet",
                "DeviceId" : NumberInt(1)
            },
            {
                "Type" : "Device",
                "Name" : "playerPromoAmt",
                "Value" : NumberInt(0),
                "DeviceClass" : "cabinet",
                "DeviceId" : NumberInt(1)
            },
            {
                "Type" : "Device",
                "Name" : "playerNonCashAmt",
                "Value" : NumberInt(0),
                "DeviceClass" : "cabinet",
                "DeviceId" : NumberInt(1)
            },
            {
                "Type" : "Device",
                "Name" : "wageredCashableAmt",
                "Value" : NumberInt(251680000),
                "DeviceClass" : "cabinet",
                "DeviceId" : NumberInt(1)
            },
            {
                "Type" : "Device",
                "Name" : "wageredPromoAmt",
                "Value" : NumberInt(0),
                "DeviceClass" : "cabinet",
                "DeviceId" : NumberInt(1)
            },
            {
                "Type" : "Device",
                "Name" : "wageredNonCashAmt",
                "Value" : NumberInt(0),
                "DeviceClass" : "cabinet",
                "DeviceId" : NumberInt(1)
            },
            {
                "Type" : "Device",
                "Name" : "cardedWageredCashableAmt",
                "Value" : NumberInt(0),
                "DeviceClass" : "cabinet",
                "DeviceId" : NumberInt(1)
            },
            {
                "Type" : "Device",
                "Name" : "cardedWageredPromoAmt",
                "Value" : NumberInt(0),
                "DeviceClass" : "cabinet",
                "DeviceId" : NumberInt(1)
            },
            {
                "Type" : "Device",
                "Name" : "cardedWageredNonCashAmt",
                "Value" : NumberInt(0),
                "DeviceClass" : "cabinet",
                "DeviceId" : NumberInt(1)
            },
            {
                "Type" : "Device",
                "Name" : "playerCashableAmt",
                "Value" : NumberInt(2052846000),
                "DeviceClass" : "cabinet",
                "DeviceId" : NumberInt(0)
            },
            {
                "Type" : "Device",
                "Name" : "playerPromoAmt",
                "Value" : NumberInt(0),
                "DeviceClass" : "cabinet",
                "DeviceId" : NumberInt(0)
            },
            {
                "Type" : "Device",
                "Name" : "playerNonCashAmt",
                "Value" : NumberInt(0),
                "DeviceClass" : "cabinet",
                "DeviceId" : NumberInt(0)
            },
            {
                "Type" : "Device",
                "Name" : "wageredCashableAmt",
                "Value" : NumberInt(251680000),
                "DeviceClass" : "cabinet",
                "DeviceId" : NumberInt(0)
            },
            {
                "Type" : "Device",
                "Name" : "wageredPromoAmt",
                "Value" : NumberInt(0),
                "DeviceClass" : "cabinet",
                "DeviceId" : NumberInt(0)
            },
            {
                "Type" : "Device",
                "Name" : "wageredNonCashAmt",
                "Value" : NumberInt(0),
                "DeviceClass" : "cabinet",
                "DeviceId" : NumberInt(0)
            },
            {
                "Type" : "Device",
                "Name" : "cardedWageredCashableAmt",
                "Value" : NumberInt(0),
                "DeviceClass" : "cabinet",
                "DeviceId" : NumberInt(0)
            },
            {
                "Type" : "Device",
                "Name" : "cardedWageredPromoAmt",
                "Value" : NumberInt(0),
                "DeviceClass" : "cabinet",
                "DeviceId" : NumberInt(0)
            },
            {
                "Type" : "Device",
                "Name" : "cardedWageredNonCashAmt",
                "Value" : NumberInt(0),
                "DeviceClass" : "cabinet",
                "DeviceId" : NumberInt(0)
            },
            {
                "Type" : "Device",
                "Name" : "wageredAmt",
                "Value" : NumberInt(128800000),
                "DeviceClass" : "gamePlay",
                "DeviceId" : NumberInt(3)
            },
            {
                "Type" : "Device",
                "Name" : "avgPaybackPct",
                "Value" : NumberInt(8991),
                "DeviceClass" : "gamePlay",
                "DeviceId" : NumberInt(3)
            },
            {
                "Type" : "Device",
                "Name" : "theoPaybackAmt",
                "Value" : NumberInt(115804080),
                "DeviceClass" : "gamePlay",
                "DeviceId" : NumberInt(3)
            },
            {
                "Type" : "Device",
                "Name" : "wageredAmt",
                "Value" : NumberInt(251680000),
                "DeviceClass" : "gamePlay",
                "DeviceId" : NumberInt(0)
            },
            {
                "Type" : "Device",
                "Name" : "avgPaybackPct",
                "Value" : NumberInt(8991),
                "DeviceClass" : "gamePlay",
                "DeviceId" : NumberInt(0)
            },
            {
                "Type" : "Device",
                "Name" : "theoPaybackAmt",
                "Value" : NumberInt(226285488),
                "DeviceClass" : "gamePlay",
                "DeviceId" : NumberInt(0)
            },
            {
                "Type" : "Wager",
                "WagerCategory" : "01",
                "Name" : "wageredAmt",
                "Value" : NumberInt(128800000),
                "DeviceClass" : "gamePlay",
                "DeviceId" : NumberInt(3)
            }
        ]
    "Timestamp" : ISODate("2023-04-05T13:18:46.831+0000")
  }
]

I am trying to find a specific Name in the array and get its value and Timestamp with the below aggregation query. The position of the "Traits.Meters.Name" that I am trying to find in the array could vary from one message to the other.

db.collection.aggregate(
    [
        {
            "$unwind" : {
                "path" : "$Traits.Meters"
            }
        }, 
        {
            "$match" : {
                "$expr" : {
                    "$or" : [
                        {
                            "$eq" : [
                                "$Traits.Meters.Name",
                                "wageredCashableAmt"
                            ]
                        },
                        {
                            "$eq" : [
                                "$Traits.Meters.Name",
                                "wageredNonCashAmt"
                            ]
                        },
                        {
                            "$eq" : [
                                "$Traits.Meters.Name",
                                "wageredPromoAmt"
                            ]
                        }
                    ]
                }
            }
        },
        {
          $project: {
            _id: 0.0,
            MeterName: "$Traits.Meters.Name",
            WageredAmount: "$Traits.Meters.Value",
            Date: {
              $dateToString: {
                format: "%Y-%m-%d",
                date: "$Timestamp",
              },
            },
          },
        }
    ]);

I am able to get the data but it is doing a collection scan of all documents and getting alerts because of it. I tried adding an index on Traits.Meters and another on Traits.Meters.Name, but none of those indexes are getting used. I am getting the below Explain result.

    "$cursor" : {
            "queryPlanner" : {
                "plannerVersion" : NumberInt(1),
                "namespace" : "Collection",
                "indexFilterSet" : false,
                "parsedQuery" : {

                },
                "queryHash" : "8B3D4AB8",
                "planCacheKey" : "8B3D4AB8",
                "winningPlan" : {
                    "stage" : "COLLSCAN",
                    "direction" : "forward"
                },
                "rejectedPlans" : [

                ]
            },
            "executionStats" : {
                "executionSuccess" : true,
                "nReturned" : NumberInt(16000),
                "executionTimeMillis" : NumberInt(263),
                "totalKeysExamined" : NumberInt(0),
                "totalDocsExamined" : NumberInt(16000),
                "executionStages" : {
                    "stage" : "COLLSCAN",
                    "nReturned" : NumberInt(16000),
                    "executionTimeMillisEstimate" : NumberInt(2),
                    "works" : NumberInt(16002),
                    "advanced" : NumberInt(16000),
                    "needTime" : NumberInt(1),
                    "needYield" : NumberInt(0),
                    "saveState" : NumberInt(27),
                    "restoreState" : NumberInt(27),
                    "isEOF" : NumberInt(1),
                    "direction" : "forward",
                    "docsExamined" : NumberInt(16000)
                }
            }
        },
        "nReturned" : NumberLong(16000),
        "executionTimeMillisEstimate" : NumberLong(34)
    }

Not all documents have Traits.Meters. Any help is greatly appreciated. Thanks.


Solution

  • Try this:

    db.collection.aggregate([
     {
      $match: {
      "Traits.Meters.Name": {
        $in: [
          "playerCashableAmt",
          "wageredPromoAmt"
         ]
        }
       }
      },
     {
      $project: {
      "Traits.Meters": {
        "$filter": {
          "input": "$Traits.Meters",
          "as": "t",
          "cond": {
            $in: [
              "$$t.Name",
              [
                "playerCashableAmt",
                "wageredPromoAmt"
              ]
            ]
          }
         }
        }
       }
      },
     {
       $unwind: "$Traits.Meters"
     },
     {
     $project: {
      _id: 0.0,
      MeterName: "$Traits.Meters.Name",
      WageredAmount: "$Traits.Meters.Value",
      Date: {
        $dateToString: {
          format: "%Y-%m-%d",
          date: "$Timestamp",
          
        },
        
       },
      
     },
    
    }
    ])
    

    Explained: Your query do not use the index because your first stage is unwind , using the match stage first will ensure your query is using the index.

    Prerequisite: Make sure you have index on "Traits.Meters.Name" field.

    1. Match all documents having Name in any of the elements in the $in:[] operations ( same like OR )
    2. Project only those sub-array elements having Name $in:[] ( same can be achieved with OR operation )
    3. $unwind array if needed. ( now result it is shorter after the match and project stages so unwinding will be easier )
    4. Project only needed fields. ( and supporess the unnecessary fields to reduce result size for better performance )

    Remark: Avoid using $unwind stage at all cost unless it is absolutely necessary , $unwind stage in a big collection is expensive and not scalable option. You can achieve most of your needs with $map , $reduce or $filter stages without the help of unwind ...

    Playground