Search code examples
mongodbaggregatorgraphlookup

Unable to achieve required response with complex mongo DB document


Title: Aggregator Query for Identifying Workflow Streams in Hierarchical Document

I'm trying to write an aggregation query in MongoDB to process a complex document representing a set of activities with parent-child relationships. The goal is to identify multiple workflows (streams) from this data.

Input Documents:

{"activties": [
  { "activityId": "A1", "children": [ { "activityId": "A2" }, { "activityId": "A7" } ] },
  { "activityId": "A2", "children": [ { "activityId": "A3" } ] },
  { "activityId": "A3", "children": [ { "activityId": "A4" } ] },
  { "activityId": "A4", "children": [ { "activityId": "A5" } ] },
  { "activityId": "A5", "children": [] },
  { "activityId": "A7", "children": [] }
]}

Aggregation query:

db.collection.aggregate([
  {
    $graphLookup: {
      from: "collection",
      startWith: "$activityId",
      connectFromField: "children.activityId",
      connectToField: "activityId",
      as: "workflowstreams",
      depthField: "depth"
    }
  },
  {
    $addFields: {
      workflowstreams: {
        $map: {
          input: "$workflowstreams",
          as: "ws",
          in: {
            activityId: "$$ws.activityId",
            depth: "$$ws.depth",
            children: "$$ws.children"
          }
        }
      }
    }
  },
  {
    $addFields: {
      splittedStreams: {
        $reduce: {
          input: "$workflowstreams",
          initialValue: { streams: [], currentStream: [] },
          in: {
            $cond: {
              if: { $eq: ["$$this.children", []] },
              then: {
                streams: { $concatArrays: ["$$value.streams", [["$$value.currentStream", "$$this.activityId"]] ] },
                currentStream: []
              },
              else: {
                streams: "$$value.streams",
                currentStream: { $concatArrays: ["$$value.currentStream", ["$$this.activityId"]] }
              }
            }
          }
        }
      }
    }
  },
  {
    $project: {
      workflowstreams: {
        $concatArrays: [
          "$splittedStreams.streams",
          {
            $cond: {
              if: { $gt: [{ $size: "$splittedStreams.currentStream" }, 0] },
              then: ["$splittedStreams.currentStream"],
              else: []
            }
          }
        ]
      }
    }
  },
  {
    $addFields: {
      workflowstreams: {
        $map: {
          input: "$workflowstreams",
          as: "ws",
          in: {
            $filter: {
              input: "$$ws",
              as: "activity",
              cond: { $ne: ["$$activity", null] }
            }
          }
        }
      }
    }
  },
  {
    $addFields: {
      workflowstreams: {
        $map: {
          input: "$workflowstreams",
          as: "ws",
          in: {
            $filter: {
              input: "$$ws",
              as: "activity",
              cond: { $ne: ["$$activity", []] }
            }
          }
        }
      }
    }
  },
  {
    $group: {
      _id: null,
      workflowstreams: { $first: "$workflowstreams" }
    }
  },
  {
    $project: {
      _id: 0,
      workflowstreams: {
        $map: {
          input: "$workflowstreams",
          as: "ws",
          in: {
            $map: {
              input: "$$ws",
              as: "activity",
              in: "$$activity"
            }
          }
        }
      }
    }
  }
]);

Expected Response:

[
  { "workflowstreams": [ ["A1", "A2", "A3", "A4", "A5"], ["A1", "A7"] ] }
]

Solution

  • There are a couple of ambiguities here:

    1. Are you going to do unconditional grouping at the end? so every "streams" in the collection will be grouped in a single array?
    2. What if there are cycles inside the graph?

    Nevertheless, for the current situation, my best guess is you can start from the child nodes, do a $graphLookup to identify the "streams". Wrangle them and $group them together.

    db.collection.aggregate([
      {
        "$match": {
          "children": []
        }
      },
      {
        "$graphLookup": {
          "from": "collection",
          "startWith": "$activityId",
          "connectFromField": "activityId",
          "connectToField": "children.activityId",
          "as": "workflowStreams"
        }
      },
      {
        "$set": {
          "workflowStreams": {
            "$setUnion": [
              [
                "$activityId"
              ],
              "$workflowStreams.activityId"
            ]
          }
        }
      },
      {
        "$group": {
          "_id": "",
          "workflowStreams": {
            "$push": "$workflowStreams"
          }
        }
      }
    ])
    

    Mongo Playground