Search code examples
databasemongodbmongooseaggregation-frameworkdocument-database

How to un-nest and group collections in mongoDB


I'm don't understand how to unwind and then nested collections in mongoDB. basically I have two collections that are structured like this:

questions doc:

{
    "_id" : 1,
    "questions" : [
        {
            "_id" : 1,
            "body" : "What fabric is the top made of?",
            "date_written" : "2018-01-04",
            "asker_name" : "yankeelover",
            "asker_email" : "first.last@gmail.com",
            "reported" : 0,
            "helpful" : 2
        },
        {
            "_id" : 2,
            "body" : "HEY THIS IS A WEIRD QUESTION!!!!?",
            "date_written" : "2019-04-28",
            "asker_name" : "jbilas",
            "asker_email" : "first.last@gmail.com",
            "reported" : 1,
            "helpful" : 4
        },
        {
            "_id" : 4,
            "body" : "How long does it last?",
            "date_written" : "2019-07-06",
            "asker_name" : "funnygirl",
            "asker_email" : "first.last@gmail.com",
            "reported" : 0,
            "helpful" : 6
        },

answers doc:

{
    "_id" : 1,
    "answers" : [
        {
            "_id" : 8,
            "body" : "DONT BUY IT! It's bad for the environment",
            "date_written" : "2018-01-04",
            "answerer_name" : "metslover",
            "answerer_email" : "first.last@gmail.com",
            "reported" : 0,
            "helpful" : 8
        },
        {
            "_id" : 7,
            "body" : "Its the best! Seriously magic fabric",
            "date_written" : "2018-01-04",
            "answerer_name" : "metslover",
            "answerer_email" : "first.last@gmail.com",
            "reported" : 0,
            "helpful" : 7
        },
        {
            "_id" : 5,
            "body" : "Something pretty soft but I can't be sure",
            "date_written" : "2018-01-04",
            "answerer_name" : "metslover",
            "answerer_email" : "first.last@gmail.com",
            "reported" : 0,
            "helpful" : 5,
            "photos" : [
                {
                    "_id" : 1,
                    "url" : "https://images.unsplash.com/photo-1530519729491-aea5b51d1ee1?ixlib=rb-1.2.1&ixid=eyJhcHBfaWQiOjEyMDd9&auto=format&fit=crop&w=1651&q=80"
                },

The _id field in the answers doc matches the _id field of the questions for which they are the answers too.

the end goal is to have data that looks something like this:

{
    "_id": "17762",
    "questions": [
        {
            "question_id": 152829,
            "question_body": "Why Does it look like this?",
            "question_date": "2021-03-06T00:00:00.000Z",
            "asker_name": "garethTheGreato",
            "question_helpfulness": 60,
            "reported": false,
            "answers": {
                "1443770": {
                    "id": 1443770,
                    "body": "This question was really helpful! Thank you.",
                    "date": "2021-03-09T00:00:00.000Z",
                    "answerer_name": "SatisfiedCustomer",
                    "helpfulness": 3,
                    "photos": []
                },
                "1443807": {
                    "id": 1443807,
                    "body": "mimk",
                    "date": "2021-03-09T00:00:00.000Z",
                    "answerer_name": "jij",
                    "helpfulness": 3,
                    "photos": [
                        "blob:http://localhost:3000/8f6375b3-0795-4210-bef7-f112feed8244"
                    ]
                },
                "1443834": {
                    "id": 1443834,
                    "body": "10/10 would recomend.",
                    "date": "2021-03-09T00:00:00.000Z",
                    "answerer_name": "Krista",
                    "helpfulness": 2,
                    "photos": []
                },
                "1443845": {
                    "id": 1443845,
                    "body": "Thank you so much for playing my game!",
                    "date": "2021-03-10T00:00:00.000Z",
                    "answerer_name": "itsameemario",
                    "helpfulness": 1,
                    "photos": []
                },
                "1443880": {
                    "id": 1443880,
                    "body": "Tree",
                    "date": "2021-03-10T00:00:00.000Z",
                    "answerer_name": "Tree",
                    "helpfulness": 0,
                    "photos": [
                        "blob:http://localhost:3000/123051b6-4dfb-410a-a96f-d4a5128e3056"
                    ]
                }
            }
        },
        {
            "question_id": 152702,
            "question_body": "Please write your question here",
            "question_date": "2021-03-05T00:00:00.000Z",
            "asker_name": "Your nickname",
            "question_helpfulness": 32,
            "reported": false,
            "answers": {}
        },

The problem I'm having is that when I run the lookup I get an answers array that correlates to the questions collection but am not sure how to get each set of answers to their specific question given that the answers come back deeply nested.

Here is what I have so far: (the ignore the slice and sorts for now these are parameters I will need for later as another part of the project)

  db.prodquests.aggregate([
    { $match: { _id: 5 } },
    { $unwind: '$questions' },
    { $match: { 'questions.reported': { $lt: 1 } } },
    { $sort: { 'questions.helpful': -1 } },
    { $group: { _id: '$_id', questions: { $push: '$questions' } } },
    { $project: { _id: 1, questions: { $slice: ['$questions', 0, 1] } } },
    { $unwind: '$questions' },
    {
      $lookup: {
        from: 'groupansphotos',
        localField: 'questions._id',
        foreignField: '_id',
        as: 'answers',
      },
    },
  ])

The return from this statement is as follows:

{
    "_id" : 5,
    "questions" : {
        "_id" : 37,
        "body" : "Why is this product cheaper here than other sites?",
        "date_written" : "2018-10-18",
        "asker_name" : "willsmith",
        "asker_email" : "first.last@gmail.com",
        "reported" : 0,
        "helpful" : 4
    },
    "answers" : [
        {
            "_id" : 37,
            "answers" : [
                {
                    "_id" : 68,
                    "body" : "We are selling it here without any markup from the middleman!",
                    "date_written" : "2018-08-18",
                    "answerer_name" : "Seller",
                    "answerer_email" : "null",
                    "reported" : 0,
                    "helpful" : 4
                }
            ]
        }
    ]
}

Essentially I want to group just that answers array under it's corresponding questions for which the _id field matches.

Thank you in advance!


Solution

  • UPDATE based on comments:

    Updated query:

    db.questions.aggregate([
        { $match: { _id: 5 } },
        { $unwind: '$questions' },
        { $match: { 'questions.reported': { $lt: 1 } } },
        { $sort: { 'questions.helpful': -1 } },
        {
            $lookup: {
                from: "answers",
                let: { question_id: "$questions._id" },
                pipeline: [
                    {
                        $match: {
                            $expr: { $eq: ["$_id", "$$question_id"] }
                        }
                    },
                    { $unwind: "$answers" },
                    {
                        $project: {
                            _id: 0,
                            k: { $toString: "$answers._id" },
                            v: "$$ROOT.answers"
                        }
                    }
                ],
                as: "answers"
            }
        },
        {
            $group: {
                _id: "$_id",
                questions: {
                    $push: {
                        question_id: "$questions._id",
                        question_body: "$questions.body",
                        question_date: "$questions.date_written",
                        asker_name: "$questions.asker_name",
                        question_helpfulness: "$questions.helpful",
                        reported: "$questions.reported",
                        answers: { $arrayToObject: "$answers" }
                    }
                }
            }
        }
    ]);
    

    Old query:

    Note: Plz fix the collection name and/or field names. Try this query:

    db.questions.aggregate([
        { $match: { _id: 5 } },
        { $unwind: '$questions' },
        { $match: { 'questions.reported': { $lt: 1 } } },
        { $sort: { 'questions.helpful': -1 } },
        {
            $lookup: {
                from: "answers",
                let: { question_id: "$questions._id" },
                pipeline: [
                    {
                        $match: {
                            $expr: { $eq: ["$_id", "$$question_id"] }
                        }
                    },
                    { $unwind: "$answers" },
                    {
                        $project: {
                            _id: 0,
                            k: { $toString: "$answers._id" },
                            v: "$$ROOT.answers"
                        }
                    }
                ],
                as: "answers"
            }
        },
        {
            $match: {
                $expr: {
                    $gt: [{ $size: "$answers" }, 0]
                }
            }
        },
        {
            $group: {
                _id: "$_id",
                questions: {
                    $push: {
                        question_id: "$questions._id",
                        question_body: "$questions.body",
                        question_date: "$questions.date_written",
                        asker_name: "$questions.asker_name",
                        question_helpfulness: "$questions.helpful",
                        reported: "$questions.reported",
                        answers: { $arrayToObject: "$answers" }
                    }
                }
            }
        }
    ]);
    

    Output:

    {
        "_id" : 5,
        "questions" : [
            {
                "question_id" : 2,
                "question_body" : "HEY THIS IS A WEIRD QUESTION!!!!?",
                "question_date" : "2019-04-28",
                "asker_name" : "jbilas",
                "question_helpfulness" : 4,
                "reported" : 0,
                "answers" : {
                    "14" : {
                        "_id" : 14,
                        "body" : "DONT BUY IT! It's bad for the environment",
                        "date_written" : "2018-01-04",
                        "answerer_name" : "metslover",
                        "answerer_email" : "first.last@gmail.com",
                        "reported" : 0,
                        "helpful" : 8
                    },
                    "15" : {
                        "_id" : 15,
                        "body" : "Its the best! Seriously magic fabric",
                        "date_written" : "2018-01-04",
                        "answerer_name" : "metslover",
                        "answerer_email" : "first.last@gmail.com",
                        "reported" : 0,
                        "helpful" : 7
                    },
                    "16" : {
                        "_id" : 16,
                        "body" : "Something pretty soft but I can't be sure",
                        "date_written" : "2018-01-04",
                        "answerer_name" : "metslover",
                        "answerer_email" : "first.last@gmail.com",
                        "reported" : 0,
                        "helpful" : 5
                    }
                }
            },
            {
                "question_id" : 1,
                "question_body" : "What fabric is the top made of?",
                "question_date" : "2018-01-04",
                "asker_name" : "yankeelover",
                "question_helpfulness" : 2,
                "reported" : 0,
                "answers" : {
                    "11" : {
                        "_id" : 11,
                        "body" : "DONT BUY IT! It's bad for the environment",
                        "date_written" : "2018-01-04",
                        "answerer_name" : "metslover",
                        "answerer_email" : "first.last@gmail.com",
                        "reported" : 0,
                        "helpful" : 8
                    },
                    "12" : {
                        "_id" : 12,
                        "body" : "Its the best! Seriously magic fabric",
                        "date_written" : "2018-01-04",
                        "answerer_name" : "metslover",
                        "answerer_email" : "first.last@gmail.com",
                        "reported" : 0,
                        "helpful" : 7
                    },
                    "13" : {
                        "_id" : 13,
                        "body" : "Something pretty soft but I can't be sure",
                        "date_written" : "2018-01-04",
                        "answerer_name" : "metslover",
                        "answerer_email" : "first.last@gmail.com",
                        "reported" : 0,
                        "helpful" : 5
                    }
                }
            }
        ]
    }
    

    Test data:

    questions collection

    {
        "_id" : 5,
        "questions" : [
            {
                "_id" : 1,
                "body" : "What fabric is the top made of?",
                "date_written" : "2018-01-04",
                "asker_name" : "yankeelover",
                "asker_email" : "first.last@gmail.com",
                "reported" : 0,
                "helpful" : 2
            },
            {
                "_id" : 2,
                "body" : "HEY THIS IS A WEIRD QUESTION!!!!?",
                "date_written" : "2019-04-28",
                "asker_name" : "jbilas",
                "asker_email" : "first.last@gmail.com",
                "reported" : 0,
                "helpful" : 4
            },
            {
                "_id" : 4,
                "body" : "How long does it last?",
                "date_written" : "2019-07-06",
                "asker_name" : "funnygirl",
                "asker_email" : "first.last@gmail.com",
                "reported" : 0,
                "helpful" : 6
            }
        ]
    }
    

    answers collection:

    /* 1 */
    {
        "_id" : 1,
        "answers" : [
            {
                "_id" : 11,
                "body" : "DONT BUY IT! It's bad for the environment",
                "date_written" : "2018-01-04",
                "answerer_name" : "metslover",
                "answerer_email" : "first.last@gmail.com",
                "reported" : 0,
                "helpful" : 8
            },
            {
                "_id" : 12,
                "body" : "Its the best! Seriously magic fabric",
                "date_written" : "2018-01-04",
                "answerer_name" : "metslover",
                "answerer_email" : "first.last@gmail.com",
                "reported" : 0,
                "helpful" : 7
            },
            {
                "_id" : 13,
                "body" : "Something pretty soft but I can't be sure",
                "date_written" : "2018-01-04",
                "answerer_name" : "metslover",
                "answerer_email" : "first.last@gmail.com",
                "reported" : 0,
                "helpful" : 5
            }
        ]
    },
    
    /* 2 */
    {
        "_id" : 2,
        "answers" : [
            {
                "_id" : 14,
                "body" : "DONT BUY IT! It's bad for the environment",
                "date_written" : "2018-01-04",
                "answerer_name" : "metslover",
                "answerer_email" : "first.last@gmail.com",
                "reported" : 0,
                "helpful" : 8
            },
            {
                "_id" : 15,
                "body" : "Its the best! Seriously magic fabric",
                "date_written" : "2018-01-04",
                "answerer_name" : "metslover",
                "answerer_email" : "first.last@gmail.com",
                "reported" : 0,
                "helpful" : 7
            },
            {
                "_id" : 16,
                "body" : "Something pretty soft but I can't be sure",
                "date_written" : "2018-01-04",
                "answerer_name" : "metslover",
                "answerer_email" : "first.last@gmail.com",
                "reported" : 0,
                "helpful" : 5
            }
        ]
    }