Search code examples
mongodbdata-sciencebusiness-intelligence

Complex time series query time difference per action type


I have this data structure of documents in MongoDB and it is intended for complex data analysis from any point of view in relation to time series of different actions (flat data log). I found it difficult to extract the time taken between specific type of changes per document using mongo queries and then applying the $graphLookup function (shown below). I'm a beginner in MongoDB and I need help with the query to fetch the required data.

the data structure of a single document (example):

{  
    "_id":NumberInt(1),
    "Creation":     ISODate("2018-11-19T06:30:42Z"),
    "Creator":      NumberInt(1),
    "Replies":      NumberInt(10),
    //... other aggregated properties 
    "CurrentProperties":{  // a copy of the last update signifying the current state
        "StatusId":         NumberInt(8),
        "PriorityId":       NumberInt(6),
        "DepartmentId":     NumberInt(5),
        "TypeId":           NumberInt(4),
        "CategoryId":       NumberInt(2),
        "SubcategoryId":    NumberInt(333),
        "ChangeTime":       ISODate("2018-11-19T10:17:20Z"),
        "TimeDelta":        NumberLong(3600000), //timespan from last change in MS 
        "ChangeType":       NumberInt(4),
        "UserId":           NumberInt(1)
    },
    "ChangeHistory":[  // time series changes
        {  
            "StatusId":         NumberInt(8),
            "PriorityId":       NumberInt(6),
            "DepartmentId":     NumberInt(1),
            "TypeId":           NumberInt(4),
            "CategoryId":       NumberInt(2),
            "SubcategoryId":    NumberInt(333),
            "ChangeTime":       ISODate("2018-11-19T10:14:20Z"),
            "TimeDelta":        NumberLong(0), //timespan from last change in MS 
            "ChangeType":       NumberInt(0), // the changed property identifier (0= creation)
            "UserId":           NumberInt(1)
        },
        {  
            "StatusId":         NumberInt(8),
            "PriorityId":       NumberInt(6),
            "DepartmentId":     NumberInt(2),
            "TypeId":           NumberInt(4),
            "CategoryId":       NumberInt(2),
            "SubcategoryId":    NumberInt(333),
            "ChangeTime":       ISODate("2018-11-19T10:15:50Z"),
            "TimeDelta":        NumberLong(90000), //timespan from last change in MS 
            "ChangeType":       NumberInt(4), // the changed property identifier (4= department)
            "UserId":           NumberInt(1)
        },
        {  
            "StatusId":         NumberInt(2),
            "PriorityId":       NumberInt(6),
            "DepartmentId":     NumberInt(2),
            "TypeId":           NumberInt(4),
            "CategoryId":       NumberInt(2),
            "SubcategoryId":    NumberInt(333),
            "ChangeTime":       ISODate("2018-11-19T10:16:20Z"),
            "TimeDelta":        NumberLong(30000), //timespan from last change in MS 
            "ChangeType":       NumberInt(2), // the changed property identifier (2= status)
            "UserId":           NumberInt(1)
        },
        {  
            "StatusId":         NumberInt(2),
            "PriorityId":       NumberInt(6),
            "DepartmentId":     NumberInt(5),
            "TypeId":           NumberInt(4),
            "CategoryId":       NumberInt(2),
            "SubcategoryId":    NumberInt(333),
            "ChangeTime":       ISODate("2018-11-19T10:17:20Z"),
            "TimeDelta":        NumberLong(60000), //timespan from last change in MS 
            "ChangeType":       NumberInt(4), // the changed property identifier (4= department)
            "UserId":           NumberInt(1)
        }
    ]
}

The expected result for department changes in time:

[{
    RecordID:       1,
    Department:     1,
    ChangeTime:     ISODate("2018-11-19T10:15:50Z"),
    TimeSpent:      90000
},
{
    RecordID:       1,
    Department:     2,
    ChangeTime:     ISODate("2018-11-19T10:17:20Z")
    TimeSpent:      90000
},
{
    RecordID:       1,
    Department:     5,
    ChangeTime:     ISODate("2018-11-21T09:47:47Z") // Current Time
    TimeSpent:      171027000 //difference between now and last change in departments
}]

and for status:

[{
    RecordID:       1,
    Status:         8,
    ChangeTime:     ISODate("2018-11-19T10:16:20Z"),
    TimeDelta:      120000
},
{
    RecordID:       1,
    Status:         2,
    ChangeTime:     ISODate("2018-11-21T09:47:47Z"), // Current Time
    TimeDelta:      171087000 //difference between now and last change in status
}]

What I tried so far

The best result I got so far was using the following aggregation to create a view and then apply a $GraphLookup function on the view:

db.test.aggregate([
    {$project: {
      _id:0,
      RecordID: "$_id",
      history: {
        $filter: {
          input: "$ChangeHistory",
          as: "changeHistory",
          cond: {$or:[
            {$eq:["$$changeHistory.ChangeType",0]},
            {$eq:["$$changeHistory.ChangeType",4]}
            ]}

                }
      }
    }}, 
    {$unwind: {
      path: "$history",
      includeArrayIndex:"order"
    }}, {$project: {
      _id:"$RecordID",
      "RecordID": "$RecordID",
      "departmentID": "$history.DepartmentId",
      "actionOrder":"$order",
      "nextAction":{$add:["$order",1]},
      "time":"$history.ChangeTime"
    }}
])

then applied the following:

db.TestView.aggregate([{
        $graphLookup: {
            from: 'TestView',
            startWith: "$nextAction",
            connectFromField: 'nextAction',
            connectToField: 'actionOrder',
            as: 'pair',
        }
    }, {
        $unwind: {
            path: "$pair"
        }
    }, {
        $project: {
            _id: 0,
            RecordID: "$_id",
            Department: "$departmentID",
            ChangeTime: "$pair.time",
            TimeSpent: {
                $subtract: ["$pair.time", "$time"]
            }
        }
    }
])

the problem with this is that it mixes the action pairing across different documents, doesn't include the spent time till the current time and it has so many propagations on top of using a view in the middle.

The data structure can be modified a little if needed.


Solution

  • I actually took 2 days trying to figure out a solution for this before posting the question, and I solved it a few hours later.

    Just wanted to share my solution and if anyone could optimize it for performance or anything please feel free to post your answers too

    Solution

    it makes use of the $zip function in order to form pairs of actions after the filter is applied by passing the original array of events and another copy of the same array excluding the first element, so that the first element gets matched with the second and the second with the third and so on. I also added a default of the current time to calculate the last element's delta from the current time.

    db.test.aggregate([{
        $project: {
          RecordID: "$_id",
          history: {
            $filter: {
              input: "$ChangeHistory",
              as: "changeHistory",
              cond: {
                $or: [{
                    $eq: ["$$changeHistory.ChangeType", 0]
                  },
                  {
                    $eq: ["$$changeHistory.ChangeType", 2]
                  }
                ]
              }
    
            }
          }
        }
      },
      {
        $addFields: {
          pairs: {
            $zip: { // here is the trick
              inputs: ["$history", {
                $slice: ["$history", 1, {
                  $size: "$history"
                }]
              }],
              useLongestLength: true,
              defaults: [0, {
                ChangeTime: new Date()
              }]
            }
          }
        }
      },
      {
        $unwind: {
          path: "$pairs"
        }
      },
      {
        $project: {
          id: "$_id",
          old: {
            $arrayElemAt: ["$pairs", 0]
          },
          new: {
            $arrayElemAt: ["$pairs", 1]
          }
        }
      },
      {
        $project: {
          RecordID: "$id",
          Status: "$old.StatusId",
          TimeDeltaMS: {
            $subtract: ["$new.ChangeTime", "$old.ChangeTime"]
          },
          ChangeTime: "$new.ChangeTime"
        }
      },
    ])