How to use MongoDB aggregation query to obtain range of documents based on filtered status?
Consider the following example data:
[
{
"visible_id": 1,
"status": "INITIAL",
"name": "ab"
},
{
"visible_id": 2,
"status": "INITIAL",
"name": "aab"
},
{
"visible_id": 3,
"status": "SCANNED",
"name": "bba"
},
{
"visible_id": 4,
"status": "SCANNED",
"name": "cca"
},
{
"visible_id": 5,
"status": "INITIAL",
"name": "dds"
},
{
"visible_id": 6,
"status": "INITIAL",
"name": "aax"
},
{
"visible_id": 7,
"status": "INITIAL",
"name": "bsd"
},
{
"visible_id": 8,
"status": "PRINTED",
"name": "ads"
}
]
The aggregated query should list me all the available sticker in the filter status in a range value of visible_id. If a different status is found in between, a new range must be projected from the next filter status onward appended to the result list.
Example Filter and Results according to the above CSV data: if filter status = "INITIAL" result:
[
{status: "INITIAL", range: "1-2"},
{status: "INITIAL", range: "5-7"}
]
if filter status = "SCANNED" result:
[
{status: "SCANNED", range: "3-4"},
]
if filter status = "PRINTED" result:
[
{status: "PRINTED", range: "8-8"},
]
Try this one:
db.collection.aggregate([
{ // Get the status from previous document
$setWindowFields: {
sortBy: { visible_id: 1 },
output: {
lag: { $shift: { output: "$status", by: -1 } }
}
}
},
{
$set: { // Check if status is different to previous status
group: {
$cond: {
if: { $eq: ["$status", "$lag"] },
then: "$$REMOVE",
else: "$visible_id"
}
}
}
},
{
$fill: { // Fill missing values with "last observation carried forward"
sortBy: { visible_id: 1 },
output: { group: { method: "locf" } }
}
},
{ $match: { status: "SCANNED" } }, // Limit the output
{
$group: { // Get min and max values
_id: "$group",
min: { $min: "$visible_id" },
max: { $max: "$visible_id" },
status: { $first: "$status" }
}
},
{
$project: { // some output cosmetics
_id: 0,
status: 1,
range: { $concat: [{ $toString: "$min" }, "-", { $toString: "$max" }] }
}
}
])
In case visible_id
is a strict sequence and you have an index on status
, then this version would be much faster:
db.collection.aggregate([
{ $match: { status: "INITIAL" } },
{
$setWindowFields: {
sortBy: { visible_id: 1 },
output: {
lag: { $shift: { output: "$visible_id", by: -1 } }
}
}
},
{
$set: {
group: {
$cond: {
if: { $eq: [{$subtract: ["$visible_id",1]}, "$lag"] },
then: "$$REMOVE",
else: "$visible_id"
}
}
}
},
{
$fill: {
sortBy: { visible_id: 1 },
output: { group: { method: "locf" } }
}
},
{
$group: {
_id: "$group",
min: { $min: "$visible_id" },
max: { $max: "$visible_id" },
status: { $first: "$status" }
}
},
{
$project: {
_id: 0,
status: 1,
range: { $concat: [{ $toString: "$min" }, "-", { $toString: "$max" }] }
}
}
])