Search code examples
mongodbaggregation-framework

Creating measures in a mongodb aggregation pipeline


I have a report that has been developed in PowerBI. It runs over a collection of jobs, and for a given month and year counts the number of jobs that were created, due or completed in that month using measures.

I am attempting to reproduce this report using a mongoDB aggregation pipeline. At first, I thought I could just use the $group stage to do this, but quickly realised that grouping by a specific date would exclude jobs.

Some sample documents are below (most fields excluded as they are not relevant):

{
"_id": <UUID>,
"createdOn": ISODate("2022-07-01T00:00"),
"dueOn": ISODate("2022-08-01T00:00"),
"completedOn": ISODate("2022-07-29T00:00")
},

{
"_id": <UUID>,
"createdOn": ISODate("2022-06-01T00:00"),
"dueOn": ISODate("2022-08-01T00:00"),
"completedOn": ISODate("2022-07-24T00:00")
}

For example, if I group by created date, the record for July 2022 would show 1 created job and only 1 completed job, but it should show 2.

How can I go about recreating this report? One idea was that I needed to determine the minimum and maximum of all the possible dates across those 3 date fields in my collection, but I don't know where to go from there


Solution

  • I ended up solving this by using a facet. I followed this process:

    1. Each facet field grouped by a different date field from the source document, and then aggregated the relevant field (e.g. counts, or sums as required). I ensured each of these fields in the facet had a unique name.
    2. I then did a project stage where I took each of the facet stage fields (arrays), and concat them into a single array
    3. I unwound the array, and then replaced the root to make it simpler to work with
    4. I then grouped again by the _id field which was set to the relevant date during the facet field, and then grabbed the relevant fields.

    The relevant parts of the pipeline are below:

    db.getCollection("jobs").aggregate(
    
        // Pipeline
        [
            
            // Stage 3
            {
                $facet: {
                    //Facet 1, group by created date, count number of jobs created
                    //facet 2, group by completed date, count number of jobs completed
                    //facet 3, group by due date, count number of jobs due
                    "created" : [
                        {
                            $addFields : {
                                "monthStarting" : {
                                    "$dateFromString" : {
                                        "dateString" : {
                                            "$dateToString" : {
                                                "date" : {
                                                    "$dateTrunc" : {
                                                        "date" : "$createdAt",
                                                        "unit" : "month",
                                                        "binSize" : 1.0,
                                                        "timezone" : "$timezone",
                                                        "startOfWeek" : "mon"
                                                    }
                                                },
                                                "timezone" : "$timezone"
                                            }
                                        }
                                    }
                                },
                                "yearStarting" : {
                                    "$dateFromString" : {
                                        "dateString" : {
                                            "$dateToString" : {
                                                "date" : {
                                                    "$dateTrunc" : {
                                                        "date" : "$createdAt",
                                                        "unit" : "year",
                                                        "binSize" : 1.0,
                                                        "timezone" : "$timezone"
                                                    }
                                                },
                                                "timezone" : "$timezone"
                                            }
                                        }
                                    }
                                }
                            }
                        },
                        {
                            $group : {
                                "_id" : {
                                    "year" : "$yearStarting",
                                    "month" : "$monthStarting"
                                },
                                "monthStarting" : {
                                    "$first" : "$monthStarting"
                                },
                                "yearStarting" : {
                                    "$first" : "$yearStarting"
                                },
                                "createdCount": {$sum: 1}
                            }
                        }
                    ],
                    "completed" : [
                         
                         {
                            $addFields : {
                                "monthStarting" : {
                                    "$dateFromString" : {
                                        "dateString" : {
                                            "$dateToString" : {
                                                "date" : {
                                                    "$dateTrunc" : {
                                                        "date" : "$completedDate",
                                                        "unit" : "month",
                                                        "binSize" : 1.0,
                                                        "timezone" : "$timezone",
                                                        "startOfWeek" : "mon"
                                                    }
                                                },
                                                "timezone" : "$timezone"
                                            }
                                        }
                                    }
                                },
                                "yearStarting" : {
                                    "$dateFromString" : {
                                        "dateString" : {
                                            "$dateToString" : {
                                                "date" : {
                                                    "$dateTrunc" : {
                                                        "date" : "$completedDate",
                                                        "unit" : "year",
                                                        "binSize" : 1.0,
                                                        "timezone" : "$timezone"
                                                    }
                                                },
                                                "timezone" : "$timezone"
                                            }
                                        }
                                    }
                                }
                            }
                        },
                        {
                            $group : {
                                "_id" : {
                                    "year" : "$yearStarting",
                                    "month" : "$monthStarting"
                                },
                                "monthStarting" : {
                                    "$first" : "$monthStarting"
                                },
                                "yearStarting" : {
                                    "$first" : "$yearStarting"
                                },
                                "completedCount": {$sum: 1}
                            }
                        }
                    ],
                    "due": [
                    {
                             $match: {
                                 "dueDate": {$ne: null}
                             }
                         },
                         {
                            $addFields : {
                                "monthStarting" : {
                                    "$dateFromString" : {
                                        "dateString" : {
                                            "$dateToString" : {
                                                "date" : {
                                                    "$dateTrunc" : {
                                                        "date" : "$dueDate",
                                                        "unit" : "month",
                                                        "binSize" : 1.0,
                                                        "timezone" : "$timezone",
                                                        "startOfWeek" : "mon"
                                                    }
                                                },
                                                "timezone" : "$timezone"
                                            }
                                        }
                                    }
                                },
                                "yearStarting" : {
                                    "$dateFromString" : {
                                        "dateString" : {
                                            "$dateToString" : {
                                                "date" : {
                                                    "$dateTrunc" : {
                                                        "date" : "$dueDate",
                                                        "unit" : "year",
                                                        "binSize" : 1.0,
                                                        "timezone" : "$timezone"
                                                    }
                                                },
                                                "timezone" : "$timezone"
                                            }
                                        }
                                    }
                                }
                            }
                        },
                        {
                            $group : {
                                "_id" : {
                                    "year" : "$yearStarting",
                                    "month" : "$monthStarting"
                                },
                                "monthStarting" : {
                                    "$first" : "$monthStarting"
                                },
                                "yearStarting" : {
                                    "$first" : "$yearStarting"
                                },
                                "dueCount": {$sum: 1},
                                "salesRevenue": {$sum: "$totalSellPrice"},
                                "costGenerated": {$sum: "$totalBuyPrice"},
                                "profit": {$sum: "$profit"},
                                "avgValue": {$avg: "$totalSellPrice"},
                                "finalisedRevenue": {$sum: {
                                        $cond: {
                                            "if": {$in: ["$status",["Finalised","Closed"]]},
                                            "then": "$totalSellPrice",
                                            "else": 0
                                        }
                                
                                }}
                            }
                        }
                    
                    ]
                }
            },
    
            // Stage 4
            {
                $project: {
                    "docs": {$concatArrays: ["$created","$completed","$due"]}
                }
            },
    
            // Stage 5
            {
                $unwind: {
                    path: "$docs",
                }
            },
    
            // Stage 6
            {
                $replaceRoot: {
                    // specifications
                       "newRoot": "$docs"
                }
            },
    
            // Stage 7
            {
                $group: {
                    _id: "$_id",
                     "monthStarting" : {
                        "$first" : "$monthStarting"
                    },
                    "yearStarting" : {
                        "$first" : "$yearStarting"
                    },
                     "monthStarting" : {
                        "$first" : "$monthStarting"
                    },
                     "createdCountSum" : {
                        "$sum" : "$createdCount"
                    },
                    "completedCountSum" : {
                        "$sum" : "$completedCount"
                    },
                     "dueCountSum" : {
                        "$sum" : "$dueCount"
                    },
                     "salesRevenue" : {
                        "$sum" : "$salesRevenue"
                    },
                     "costGenerated" : {
                        "$sum" : "$costGenerated"
                    },
                     "profit" : {
                        "$sum" : "$profit"
                    },
                     "finalisedRevenue" : {
                        "$sum" : "$finalisedRevenue"
                    },
                     "avgJobValue": {
                        $sum: "$avgValue"
                    }
                }
            },
        ],
    
    );