need your help with indexing...
I have documents that contain the below data structure - (several documents in an array).
[
{
"Traits" : {
"Meters" : [
{
"Type" : "Device",
"Name" : "playerCashableAmt",
"Value" : NumberInt(2052846000),
"DeviceClass" : "cabinet",
"DeviceId" : NumberInt(1)
},
{
"Type" : "Device",
"Name" : "playerPromoAmt",
"Value" : NumberInt(0),
"DeviceClass" : "cabinet",
"DeviceId" : NumberInt(1)
},
{
"Type" : "Device",
"Name" : "playerNonCashAmt",
"Value" : NumberInt(0),
"DeviceClass" : "cabinet",
"DeviceId" : NumberInt(1)
},
{
"Type" : "Device",
"Name" : "wageredCashableAmt",
"Value" : NumberInt(251680000),
"DeviceClass" : "cabinet",
"DeviceId" : NumberInt(1)
},
{
"Type" : "Device",
"Name" : "wageredPromoAmt",
"Value" : NumberInt(0),
"DeviceClass" : "cabinet",
"DeviceId" : NumberInt(1)
},
{
"Type" : "Device",
"Name" : "wageredNonCashAmt",
"Value" : NumberInt(0),
"DeviceClass" : "cabinet",
"DeviceId" : NumberInt(1)
},
{
"Type" : "Device",
"Name" : "cardedWageredCashableAmt",
"Value" : NumberInt(0),
"DeviceClass" : "cabinet",
"DeviceId" : NumberInt(1)
},
{
"Type" : "Device",
"Name" : "cardedWageredPromoAmt",
"Value" : NumberInt(0),
"DeviceClass" : "cabinet",
"DeviceId" : NumberInt(1)
},
{
"Type" : "Device",
"Name" : "cardedWageredNonCashAmt",
"Value" : NumberInt(0),
"DeviceClass" : "cabinet",
"DeviceId" : NumberInt(1)
},
{
"Type" : "Device",
"Name" : "playerCashableAmt",
"Value" : NumberInt(2052846000),
"DeviceClass" : "cabinet",
"DeviceId" : NumberInt(0)
},
{
"Type" : "Device",
"Name" : "playerPromoAmt",
"Value" : NumberInt(0),
"DeviceClass" : "cabinet",
"DeviceId" : NumberInt(0)
},
{
"Type" : "Device",
"Name" : "playerNonCashAmt",
"Value" : NumberInt(0),
"DeviceClass" : "cabinet",
"DeviceId" : NumberInt(0)
},
{
"Type" : "Device",
"Name" : "wageredCashableAmt",
"Value" : NumberInt(251680000),
"DeviceClass" : "cabinet",
"DeviceId" : NumberInt(0)
},
{
"Type" : "Device",
"Name" : "wageredPromoAmt",
"Value" : NumberInt(0),
"DeviceClass" : "cabinet",
"DeviceId" : NumberInt(0)
},
{
"Type" : "Device",
"Name" : "wageredNonCashAmt",
"Value" : NumberInt(0),
"DeviceClass" : "cabinet",
"DeviceId" : NumberInt(0)
},
{
"Type" : "Device",
"Name" : "cardedWageredCashableAmt",
"Value" : NumberInt(0),
"DeviceClass" : "cabinet",
"DeviceId" : NumberInt(0)
},
{
"Type" : "Device",
"Name" : "cardedWageredPromoAmt",
"Value" : NumberInt(0),
"DeviceClass" : "cabinet",
"DeviceId" : NumberInt(0)
},
{
"Type" : "Device",
"Name" : "cardedWageredNonCashAmt",
"Value" : NumberInt(0),
"DeviceClass" : "cabinet",
"DeviceId" : NumberInt(0)
},
{
"Type" : "Device",
"Name" : "wageredAmt",
"Value" : NumberInt(128800000),
"DeviceClass" : "gamePlay",
"DeviceId" : NumberInt(3)
},
{
"Type" : "Device",
"Name" : "avgPaybackPct",
"Value" : NumberInt(8991),
"DeviceClass" : "gamePlay",
"DeviceId" : NumberInt(3)
},
{
"Type" : "Device",
"Name" : "theoPaybackAmt",
"Value" : NumberInt(115804080),
"DeviceClass" : "gamePlay",
"DeviceId" : NumberInt(3)
},
{
"Type" : "Device",
"Name" : "wageredAmt",
"Value" : NumberInt(251680000),
"DeviceClass" : "gamePlay",
"DeviceId" : NumberInt(0)
},
{
"Type" : "Device",
"Name" : "avgPaybackPct",
"Value" : NumberInt(8991),
"DeviceClass" : "gamePlay",
"DeviceId" : NumberInt(0)
},
{
"Type" : "Device",
"Name" : "theoPaybackAmt",
"Value" : NumberInt(226285488),
"DeviceClass" : "gamePlay",
"DeviceId" : NumberInt(0)
},
{
"Type" : "Wager",
"WagerCategory" : "01",
"Name" : "wageredAmt",
"Value" : NumberInt(128800000),
"DeviceClass" : "gamePlay",
"DeviceId" : NumberInt(3)
}
]
"Timestamp" : ISODate("2023-04-05T13:18:46.831+0000")
}
]
I am trying to find a specific Name in the array and get its value and Timestamp with the below aggregation query. The position of the "Traits.Meters.Name" that I am trying to find in the array could vary from one message to the other.
db.collection.aggregate(
[
{
"$unwind" : {
"path" : "$Traits.Meters"
}
},
{
"$match" : {
"$expr" : {
"$or" : [
{
"$eq" : [
"$Traits.Meters.Name",
"wageredCashableAmt"
]
},
{
"$eq" : [
"$Traits.Meters.Name",
"wageredNonCashAmt"
]
},
{
"$eq" : [
"$Traits.Meters.Name",
"wageredPromoAmt"
]
}
]
}
}
},
{
$project: {
_id: 0.0,
MeterName: "$Traits.Meters.Name",
WageredAmount: "$Traits.Meters.Value",
Date: {
$dateToString: {
format: "%Y-%m-%d",
date: "$Timestamp",
},
},
},
}
]);
I am able to get the data but it is doing a collection scan of all documents and getting alerts because of it. I tried adding an index on Traits.Meters and another on Traits.Meters.Name, but none of those indexes are getting used. I am getting the below Explain result.
"$cursor" : {
"queryPlanner" : {
"plannerVersion" : NumberInt(1),
"namespace" : "Collection",
"indexFilterSet" : false,
"parsedQuery" : {
},
"queryHash" : "8B3D4AB8",
"planCacheKey" : "8B3D4AB8",
"winningPlan" : {
"stage" : "COLLSCAN",
"direction" : "forward"
},
"rejectedPlans" : [
]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : NumberInt(16000),
"executionTimeMillis" : NumberInt(263),
"totalKeysExamined" : NumberInt(0),
"totalDocsExamined" : NumberInt(16000),
"executionStages" : {
"stage" : "COLLSCAN",
"nReturned" : NumberInt(16000),
"executionTimeMillisEstimate" : NumberInt(2),
"works" : NumberInt(16002),
"advanced" : NumberInt(16000),
"needTime" : NumberInt(1),
"needYield" : NumberInt(0),
"saveState" : NumberInt(27),
"restoreState" : NumberInt(27),
"isEOF" : NumberInt(1),
"direction" : "forward",
"docsExamined" : NumberInt(16000)
}
}
},
"nReturned" : NumberLong(16000),
"executionTimeMillisEstimate" : NumberLong(34)
}
Not all documents have Traits.Meters. Any help is greatly appreciated. Thanks.
Try this:
db.collection.aggregate([
{
$match: {
"Traits.Meters.Name": {
$in: [
"playerCashableAmt",
"wageredPromoAmt"
]
}
}
},
{
$project: {
"Traits.Meters": {
"$filter": {
"input": "$Traits.Meters",
"as": "t",
"cond": {
$in: [
"$$t.Name",
[
"playerCashableAmt",
"wageredPromoAmt"
]
]
}
}
}
}
},
{
$unwind: "$Traits.Meters"
},
{
$project: {
_id: 0.0,
MeterName: "$Traits.Meters.Name",
WageredAmount: "$Traits.Meters.Value",
Date: {
$dateToString: {
format: "%Y-%m-%d",
date: "$Timestamp",
},
},
},
}
])
Explained: Your query do not use the index because your first stage is unwind , using the match stage first will ensure your query is using the index.
Prerequisite: Make sure you have index on "Traits.Meters.Name" field.
Remark: Avoid using $unwind stage at all cost unless it is absolutely necessary , $unwind stage in a big collection is expensive and not scalable option. You can achieve most of your needs with $map , $reduce or $filter stages without the help of unwind ...