Search code examples
pythonmongodbaggregation-frameworkpymongo

mongodb get unique values from array from the same document when aggreagting


I'm querying to retrieve unique array of objects from a nested array document, so I'm using aggregation and $group in it.

My Data Structure

{
   _id: ObjectId('637b22639356492ae41bbe23'),
   studentAgeCategories: [
       {
          competitionsType: [
           {
                    competitionsTypeId: ObjectId("5fec0c53a534c297c6b4c5c3"),
                    competitionsTypeName: "Animation/Documentary/Film"
            }
          ]
       }
   ]
}

Tried Query

{'$match': {'_id': ObjectId('637b22639356492ae41bbe23')}},
{'$unwind': '$studentAgeCategories'},
{'$group': {
    '_id': '$studentAgeCategories.competitionsType.competitionsTypeId',
    'competitionType': {
        '$push': {
            '_id': '$studentAgeCategories.competitionsType.competitionsTypeId',
            'competitionsTypeName': '$studentAgeCategories.competitionsType.competitionsTypeName'
            }
        }
    }
}

dbconn.clnEvents.aggregate(pipeline)

This is the result.

/* 1 */
{
    "_id" : [
        ObjectId("5fec0c53a534c297c6b4c5c3"),
        ObjectId("5fec0c65a534c297c6b4ce3a"),
        ObjectId("5fec0c8aa534c297c6b4dda7")
    ],
    "competitionType" : [
        {
            "_id" : [
                ObjectId("5fec0c53a534c297c6b4c5c3"),
                ObjectId("5fec0c65a534c297c6b4ce3a"),
                ObjectId("5fec0c8aa534c297c6b4dda7")
            ],
            "competitionsTypeName" : [ "Animation/Documentary/Film", "Visual coding", "Websites/Web/Mobile Apps" ]
        }
    ]
},

/* 2 */
{
    "_id" : [
        ObjectId("5ff457b2add6eab7491fff13")
    ],
    "competitionType" : [
        {
            "_id" : [
                ObjectId("5ff457b2add6eab7491fff13")
            ],
            "competitionsTypeName" : [ "Presentation" ]
        }
    ]
},

/* 3 */
{
    "_id" : [
        ObjectId("5fec0c9fa534c297c6b4e4b6")
    ],
    "competitionType" : [
        {
            "_id" : [
                ObjectId("5fec0c9fa534c297c6b4e4b6")
            ],
            "competitionsTypeName" : [ "AI/Robotics/IoT" ]
        }
    ]
},

/* 4 */
{
    "_id" : [
        ObjectId("60d8843f6ac43b6179d3bd7e"),
        ObjectId("5ff457b2add6eab7491fff13"),
        ObjectId("5fec0c53a534c297c6b4c5c3")
    ],
    "competitionType" : [
        {
            "_id" : [
                ObjectId("60d8843f6ac43b6179d3bd7e"),
                ObjectId("5ff457b2add6eab7491fff13"),
                ObjectId("5fec0c53a534c297c6b4c5c3")
            ],
            "competitionsTypeName" : [ "Programming Competition", "Presentation", "Animation/Documentary/Film" ]
        }
    ]
},

/* 5 */
{
    "_id" : [
        ObjectId("60d8843f6ac43b6179d3bd7e")
    ],
    "competitionType" : [
        {
            "_id" : [
                ObjectId("60d8843f6ac43b6179d3bd7e")
            ],
            "competitionsTypeName" : [ "Programming Competition" ]
        }
    ]
}

Instead, I need to get something like this.


"competitionType" : [
    
{"_id" :ObjectId("5fec0c9fa534c297c6b4e4b6"),
 "competitionsTypeName" : "AI/Robotics/IoT" },

{"_id" :ObjectId("5fec0c9fa534c297c6b4e4b6"),
 "competitionsTypeName" : "AI/Robotics/IoT" },

{"_id" :ObjectId("5fec0c9fa534c297c6b4e4b6"),
 "competitionsTypeName" : "AI/Robotics/IoT" }]  
]

After going through some of the mongodb articles and forums, I got lost and now I don't know what I'm missing here.


Solution

  • You will need to unwind both studentAgeCategories and competitionsType

    db.clnEvents.aggregate([
    
    {
        $match: {
            _id: ObjectId("62c3e2c03c004872845b2766")
        }
    }, 
    
    {
        $unwind: '$studentAgeCategories'
    }, 
    
    {
        $unwind: '$studentAgeCategories.competitionsType'
    }, 
    
    
    {
        $project: {
            competitionsTypeId: '$studentAgeCategories.competitionsType.competitionsTypeId',
            competitionsTypeName: '$studentAgeCategories.competitionsType.competitionsTypeName'
        }
    },
    
    {
        $group: {
            _id: '$competitionsTypeId',
            competitionsTypeName: {
                $first: '$competitionsTypeName'
            }
        }
    }
        
    ])
    

    Use $group to remove duplicates