Title: Aggregator Query for Identifying Workflow Streams in Hierarchical Document
I'm trying to write an aggregation query in MongoDB to process a complex document representing a set of activities with parent-child relationships. The goal is to identify multiple workflows (streams) from this data.
Input Documents:
{"activties": [
{ "activityId": "A1", "children": [ { "activityId": "A2" }, { "activityId": "A7" } ] },
{ "activityId": "A2", "children": [ { "activityId": "A3" } ] },
{ "activityId": "A3", "children": [ { "activityId": "A4" } ] },
{ "activityId": "A4", "children": [ { "activityId": "A5" } ] },
{ "activityId": "A5", "children": [] },
{ "activityId": "A7", "children": [] }
]}
Aggregation query:
db.collection.aggregate([
{
$graphLookup: {
from: "collection",
startWith: "$activityId",
connectFromField: "children.activityId",
connectToField: "activityId",
as: "workflowstreams",
depthField: "depth"
}
},
{
$addFields: {
workflowstreams: {
$map: {
input: "$workflowstreams",
as: "ws",
in: {
activityId: "$$ws.activityId",
depth: "$$ws.depth",
children: "$$ws.children"
}
}
}
}
},
{
$addFields: {
splittedStreams: {
$reduce: {
input: "$workflowstreams",
initialValue: { streams: [], currentStream: [] },
in: {
$cond: {
if: { $eq: ["$$this.children", []] },
then: {
streams: { $concatArrays: ["$$value.streams", [["$$value.currentStream", "$$this.activityId"]] ] },
currentStream: []
},
else: {
streams: "$$value.streams",
currentStream: { $concatArrays: ["$$value.currentStream", ["$$this.activityId"]] }
}
}
}
}
}
}
},
{
$project: {
workflowstreams: {
$concatArrays: [
"$splittedStreams.streams",
{
$cond: {
if: { $gt: [{ $size: "$splittedStreams.currentStream" }, 0] },
then: ["$splittedStreams.currentStream"],
else: []
}
}
]
}
}
},
{
$addFields: {
workflowstreams: {
$map: {
input: "$workflowstreams",
as: "ws",
in: {
$filter: {
input: "$$ws",
as: "activity",
cond: { $ne: ["$$activity", null] }
}
}
}
}
}
},
{
$addFields: {
workflowstreams: {
$map: {
input: "$workflowstreams",
as: "ws",
in: {
$filter: {
input: "$$ws",
as: "activity",
cond: { $ne: ["$$activity", []] }
}
}
}
}
}
},
{
$group: {
_id: null,
workflowstreams: { $first: "$workflowstreams" }
}
},
{
$project: {
_id: 0,
workflowstreams: {
$map: {
input: "$workflowstreams",
as: "ws",
in: {
$map: {
input: "$$ws",
as: "activity",
in: "$$activity"
}
}
}
}
}
}
]);
Expected Response:
[
{ "workflowstreams": [ ["A1", "A2", "A3", "A4", "A5"], ["A1", "A7"] ] }
]
There are a couple of ambiguities here:
Nevertheless, for the current situation, my best guess is you can start from the child nodes, do a $graphLookup
to identify the "streams". Wrangle them and $group
them together.
db.collection.aggregate([
{
"$match": {
"children": []
}
},
{
"$graphLookup": {
"from": "collection",
"startWith": "$activityId",
"connectFromField": "activityId",
"connectToField": "children.activityId",
"as": "workflowStreams"
}
},
{
"$set": {
"workflowStreams": {
"$setUnion": [
[
"$activityId"
],
"$workflowStreams.activityId"
]
}
}
},
{
"$group": {
"_id": "",
"workflowStreams": {
"$push": "$workflowStreams"
}
}
}
])