Search code examples
mongodbmongodb-queryaggregation-frameworkmongodb-java

Filter output from multiple collections


Suppose we have an array of scrips as such:

 ["xxxxx1","xxxxx2","xxxxx3","xxxxx4"] 

We have the following collections of scrips existing in db:

Executed:{"_id" : ObjectId("xxx"),"scrip" : "xxxxx1" },{"_id" : ObjectId("xxy"),"scrip" : "xxxxx3" }......
In-process:{"_id" : ObjectId("xxx"),"scrip" : "xxxxx4" }, ....
Rejected:{"_id" : ObjectId("xxx"),"scrip" : "xxxxx5" }....

We would like get the array of scrips not present in any of the collections above. Expected output:

["xxxxx2"]

How can this be done in a single mongodb pipeline/query?


Solution

  • The following query can get us the expected output:

    db.executed.aggregate([
        {
            $group:{
                "_id":null,
                "executedScrips":{
                    $addToSet:"$scrip"
                }
            }
        },
        {
            $lookup:{
                "from":"inprocess",
                "pipeline":[
                    {
                        $group:{
                            "_id":null,
                            "inprocessScrips":{
                                $addToSet:"$scrip"
                            }
                        }
                    }
                ],
                "as":"inprocessLookup"
            }
        },
        {
            $lookup:{
                "from":"rejected",
                "pipeline":[
                    {
                        $group:{
                            "_id":null,
                            "rejectedScrips":{
                                $addToSet:"$scrip"
                            }
                        }
                    }
                ],
                "as":"rejectedLookup"
            }
        },
        {
            $unwind:{
                "path":"$inprocessLookup",
                "preserveNullAndEmptyArrays":true
            }
        },
        {
            $unwind:{
                "path":"$rejectedLookup",
                "preserveNullAndEmptyArrays":true
            }
        },
        {
            $project:{  
                "scrips":{
                    $concatArrays:[
                        "$executedScrips", 
                        {
                            $ifNull:["$inprocessLookup.inprocessScrips",[]]
                        },
                        {
                            $ifNull:["$rejectedLookup.rejectedScrips",[]]
                        }
                    ]
                }
            }
        },
        {
            $project:{
                "_id":0,
                "notFound":{
                    $setDifference:[["xxxxx1","xxxxx2","xxxxx3","xxxxx4"],"$scrips"]
                }
            }
        }
    ]).pretty()
    

    Data Set:

    Collection: executed

        { "_id" : ObjectId("5d60e572f00e0c8c3593b5ff"), "scrip" : "xxxxx1" }
        { "_id" : ObjectId("5d60e572f00e0c8c3593b600"), "scrip" : "xxxxx3" }
    

    Collection: inprocess

        { "_id" : ObjectId("5d60f23ff00e0c8c3593b601"), "scrip" : "xxxxx4" }
    

    Collection: rejected

        { "_id" : ObjectId("5d60f260f00e0c8c3593b602"), "scrip" : "xxxxx5" }
    

    Output:

    { "notFound" : [ "xxxxx2" ] }
    

    Note: The query will fail if there are no records in executed collection as the aggregation starts from there.


    Update I: The array of requests would be passed instead of an array of scrips

    The following query can get us the expected output:

    db.executed.aggregate([
        {
            $group:{
                "_id":null,
                "executedScrips":{
                    $addToSet:"$scrip"
                }
            }
        },
        {
            $lookup:{
                "from":"inprocess",
                "pipeline":[
                    {
                        $group:{
                            "_id":null,
                            "inprocessScrips":{
                                $addToSet:"$scrip"
                            }
                        }
                    }
                ],
                "as":"inprocessLookup"
            }
        },
        {
            $lookup:{
                "from":"rejected",
                "pipeline":[
                    {
                        $group:{
                            "_id":null,
                            "rejectedScrips":{
                                $addToSet:"$scrip"
                            }
                        }
                    }
                ],
                "as":"rejectedLookup"
            }
        },
        {
            $unwind:{
                "path":"$inprocessLookup",
                "preserveNullAndEmptyArrays":true
            }
        },
        {
            $unwind:{
                "path":"$rejectedLookup",
                "preserveNullAndEmptyArrays":true
            }
        },
        {
            $project:{  
                "scrips":{
                    $concatArrays:[
                        "$executedScrips", 
                        {
                            $ifNull:["$inprocessLookup.inprocessScrips",[]]
                        },
                        {
                            $ifNull:["$rejectedLookup.rejectedScrips",[]]
                        }
                    ]
                }
            }
        },
        {
            $addFields:{
                "requests":[
                    {
                        "requestid" : "R1", 
                        "stocks" : ["xxxxx1","xxxxx2","xxxxx3","xxxxx4"]
                    },
                    {
                        "requestid" : "R2", 
                        "stocks" : ["xxxxx1","xxxxx3","xxxxx4"]
                    },
                    {
                        "requestid" : "R3", 
                        "stocks" : ["xxxxx1","xxxxx3","xxxxx4","xxxxx10"]
                    }
                ]
            }
        },
        {
            $project:{
                "_id":0,
                "unmatchedRequests":{
                   $map:{
                        "input":"$requests",
                        "as":"request",
                        "in":{
                            $concat:{
                                $cond:[
                                    {
                                        $gt:[
                                            {
                                                $size:{
                                                    $setDifference:["$$request.stocks","$scrips"]
                                                }
                                            },
                                            0
                                        ]
                                    },
                                    "$$request.requestid",
                                    null
                                ]
                            }
                        }
                   }
                }
            }
        },
        {
            $project:{
                "unmatchedRequests":{
                    $filter:{
                        "input":"$unmatchedRequests",
                        "as":"unmatchedRequest",
                        "cond":{
                            $ne:["$$unmatchedRequest",null]
                        }
                    }
                }
            }
        }
    ]).pretty()
    

    Output:

    { "unmatchedRequests" : [ "R1", "R3" ] }
    

    Note: In the 7th aggregation stage, we are injecting the array of requests.