Search code examples
mongodbaggregatelookup

Aggregation lookup in a sub field inside an array


I have the following MongoDB schema for User collection

{
...otherProperties,
skills: [
{
    skill: { type: db.Schema.Types.ObjectId, ref: 'Skill' },
    level: { type: String }
}
]
}

and I am trying to join this collection to the skills collection using aggregation But I wanna keep the other field level and just join each one with its object I've tried the following aggregation

[{$lookup:{ from: 'Skill',localField: 'skills.skill', foreignField: '_id', as: 'skills.skill' }}]

The result I got is the following

{
  "_id": {
    "$oid": "6130f296eff45efaa80cb4e6"
  },
  "skills": {
    "skill": [
      {
        "_id": {
          "$oid": "60d9a0996e5c5f0025f3c882"
        },
        "job_post_enabled": true,
        "job_seeker_enabled": true,
        "job_seeker": "A/R Management",
        "job_post": "A/R Management",
        "__v": 0,
        "translations": {
          "AR": {
            "job_post": "A/R Management",
            "job_seeker": "A/R Management"
          },
          "EN": {
            "job_post": "A/R Management",
            "job_seeker": "A/R Management"
          }
        },
        "created_at": {
          "$date": "2021-06-28T10:12:58.221Z"
        },
        "updated_at": {
          "$date": "2022-01-25T21:52:55.447Z"
        }
      },
      {
        "_id": {
          "$oid": "60d9a0996e5c5f0025f3c886"
        },
        "job_post_enabled": true,
        "job_seeker_enabled": true,
        "job_seeker": "A/V systems",
        "job_post": "A/V systems",
        "__v": 0,
        "translations": {
          "AR": {
            "job_post": "A/V systems",
            "job_seeker": "A/V systems"
          },
          "EN": {
            "job_post": "A/V systems",
            "job_seeker": "A/V systems"
          }
        },
        "created_at": {
          "$date": "2021-06-28T10:12:58.221Z"
        },
        "updated_at": {
          "$date": "2022-01-25T21:52:55.453Z"
        }
      }
    ]
  }
}

Is there a way to join each skill with its own record using aggregation? and keep the fields as they are

My current data

{
    "skills": [
        {
            "skill": ObjectId("60d9a0996e5c5f0025f3c882"),
            "level": "High"
        },
        {
            "skill": ObjectId("60d9a0996e5c5f0025f3c886"),
            "level": "Mid"
        }
    ]
}

The desired output

{
    "skills": [
        {
            "skill":       {
                "_id": {
                  "$oid": "60d9a0996e5c5f0025f3c882"
                },
                "job_post_enabled": true,
                "job_seeker_enabled": true,
                "job_seeker": "A/R Management",
                "job_post": "A/R Management",
                "__v": 0,
                "translations": {
                  "AR": {
                    "job_post": "A/R Management",
                    "job_seeker": "A/R Management"
                  },
                  "EN": {
                    "job_post": "A/R Management",
                    "job_seeker": "A/R Management"
                  }
                },
                "created_at": {
                  "$date": "2021-06-28T10:12:58.221Z"
                },
                "updated_at": {
                  "$date": "2022-01-25T21:52:55.447Z"
                }
              },
              "level": "High"
        },
        {
            "skill": {
                "_id": {
                  "$oid": "60d9a0996e5c5f0025f3c886"
                },
                "job_post_enabled": true,
                "job_seeker_enabled": true,
                "job_seeker": "A/V systems",
                "job_post": "A/V systems",
                "__v": 0,
                "translations": {
                  "AR": {
                    "job_post": "A/V systems",
                    "job_seeker": "A/V systems"
                  },
                  "EN": {
                    "job_post": "A/V systems",
                    "job_seeker": "A/V systems"
                  }
                },
                "created_at": {
                  "$date": "2021-06-28T10:12:58.221Z"
                },
                "updated_at": {
                  "$date": "2022-01-25T21:52:55.453Z"
                }
              },
            "level": "Mid"
        }
    ]
}

Solution

  • Solution with caveat that skill _id should exist in lookup collection; otherwise, the skill _id will not appear in the output i.e. there is no immediate way to tell something is missing.

    c = db.foo.aggregate([
        // Optional match here:                                                                                                    
        //{$match: {_id:0}},                                                                                                       
    
        // Do the basic lookup:                                                                                                    
        {$lookup: {
            from: "Xskills",
            localField: "skills.skill",
            foreignField: "_id",
            as: "zz"
            }}
    
        // zz now contains an array of zero to N matches against the skills                                                        
        // table.  Use $zip to merge the separate skills and detail array fields, e.g.                                                        
        //   {                                                                                                                     
        //     skills: [ {skill:"S1",level:"High"},                                                                                
        //     zz:     [ {job_seeker:"whatever",job_post_enabled:true,...}, {QQQ} ]                                                
        // becomes an array of arrays [ [skill,detail], [skill,detail], ...]                                                                                                               
        //   {                                                                                                                     
        //     skills: [ [ {skill:"S1",level:"High"}, {job_seeker:"whatever",job_post_enabled:true,...} ], [ {skill:"S2",level:"Mid"}, {job_seeker:"whatever",job_post_enabled:true,...} ] ]                                                                     
        //   }                                                                                                                     
    
        ,{$project: {
            "skills": {$zip: {inputs: [ "$skills", "$zz" ]}}
        }}
    
        // Now we have a single array field with a well-known structure that we can                                                
        // reformat to produce the output we seek:                                                                                 
        ,{$project: {
            "skills": {$map: {input: "$skills", in:
                              {
                                  "skill": {$arrayElemAt:["$$this",1]},
                                  "level": {$arrayElemAt:["$$this.level",0]}
                              }
                             }}
        }}
    ]);
    

    Here is a more defensive option. It works with dupes and will capture missing skills:

    c=db.foo.aggregate([
        // Optional match here:                                                      
        //{$match: {_id:0}},                                                         
    
        {$unwind: "$skills"}
    
        ,{$lookup: {
            from: "Xskills",
            localField: "skills.skill",
            foreignField: "_id",
            as: "zz"
        }}
    
    
        ,{$project: {
            skills: {
                skill: {$cond: [
                    {$eq:[0,{$size: "$zz"}]},  // IF $lookup does not provide a match
                    {"_id": "$skills.skill"},  // THEN just set the skill id
                    {$arrayElemAt:["$zz",0]}   // ELSE copy over the whole skill entry including _id
                ]},
    
                level: "$skills.level"
            }
        }}
    
        ,{$group: {_id: "$_id", skills: {$push: "$skills"}}}
    
    ]);