I would like to extract from the collection the IDs of documents that have duplicate IDs of "drives" objects that are nested in the array that is in "streetModel".
This is my typical document :
{
"_id": {
"$oid": "61375bec4fa522001b608568"
},
"name": "Streetz",
"statusDetail": {},
"streetModel": {
"_id": "3.7389-51.0566",
"name": "Kosheen - Darude - Swedish - Trynidad - Maui",
"countryCode": "DEN",
"drives": [{
"_id": -903500698,
"direction": "WEST"
}, {
"_id": 1915399546,
"direction": "EAST"
}, {
"_id": 1294835467,
"direction": "NORTH"
}, {
"_id": 1248969937,
"direction": "EAST"
}, {
"_id": 1248969937,
"direction": "EAST"
}, {
"_id": 1492411786,
"direction": "SOUTH"
}]
},
"createdAt": {
"$date": "2021-09-07T12:32:44.238Z"
}
}
In this particular document with the ID 61375bec4fa522001b608568, in "streetModel", in "drives" array I have got duplicated drives objects with id 1248969937.
I would like to create a query to the database that will return the ID of all documents with such a problem (duplicate "drives").
Right now I have got this:
db.streets.aggregate([
{
$unwind: "$streetModel"
},
{
$unwind: "$drives"
},
{
$group: {
_id: {
id: "$_id"
},
sum: {
$sum: 1
},
}
},
{
$match: {
sum: {
$gt: 1
}
}
},
{
$project: {
_id: "$_id._id",
duplicates: {
drives: "$_id"
}
}
}
])
but that's not it.
I try in many ways to rewrite this query, but unfortunately it doesn't work.
Query
db.collection.aggregate([
{
"$unwind": {
"path": "$streetModel.drives"
}
},
{
"$group": {
"_id": {
"docid": "$_id",
"driveid": "$streetModel.drives._id"
},
"duplicates": {
"$push": "$streetModel.drives.direction"
}
}
},
{
"$match": {
"$expr": {
"$gt": [
{
"$size": "$duplicates"
},
1
]
}
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"$_id",
"$$ROOT"
]
}
}
},
{
"$project": {
"_id": 0
}
}
])