Search code examples
arraysmongodbmongodb-queryaggregation-frameworkaggregate

I need to insert the documents between the dates with help of past documents in mongodb collections


I have some documents from 13-july-2013 to 18-july-2013 in mongodb collections. but few days data was missed from collection. I need to insert that data with help of last five documents.

My data look like

{ 
  _id: 1,
  "timestamp": "2023-07-13T10:00:00Z"
  "code": "bc",
  "energy": 2333
},
{ 
  _id: 2,
  "timestamp": "2023-07-13T10:10:00Z"
  "code": "abc",
  "energy": 2340
},
{ 
  _id: 3,
  "timestamp": "2023-07-14T10:30:00Z"
  "code": "abc",
  "energy": 2349
},
{ 
  _id: 4,
  "timestamp": "2023-07-14T10:40:00Z"
  "code": "ah",
  "energy": 2355
},
{ 
  _id: 5,
  "timestamp": "2023-07-14T10:50:00Z"
  "code": "kk",
  "energy": 2360
},
{ 
  _id: 6,
  "timestamp": "2023-07-14T11:00:00Z"
  "code": "la",
  "energy": 2370
},
{ 
  _id: 7,
  "timestamp": "2023-07-14T10:00:00Z"
  "code": "as",
  "energy": 3455
},
{ 
  _id: 8,
  "timestamp": "2023-07-17T10:10:00Z"
  "code": "uj",
  "energy": 4567659
},
{ 
  _id: 9,
  "timestamp": "2023-07-17T10:30:00Z"
  "code": "la",
  "energy": 564546
},
{ 
  _id: 10,
  "timestamp": "2023-07-17T10:40:00Z"
  "code": "ws",
  "energy": 5654348
},
{ 
  _id: 11,
  "timestamp": "2023-07-18T10:50:00Z"
  "code": "lk",
  "energy": 6765436
},
{ 
  _id: 12,
  "timestamp": "2023-07-18T11:00:00Z"
  "code": "pl",
  "energy": 7654223
}

Here 15th and 16th data missed so I need insert these documents with update the energy field value, take the average of last 5 documents energy field value then mention that value in 15th date documents, like same as for next documents also, please help on this.

Output:

{ 
      _id: 1,
      "timestamp": "2023-07-13T10:00:00Z"
      "code": "bc",
      "energy": 2333
    },
    { 
      _id: 2,
      "timestamp": "2023-07-13T10:10:00Z"
      "code": "abc",
      "energy": 2340
    },
    { 
      _id: 3,
      "timestamp": "2023-07-14T10:30:00Z"
      "code": "abc",
      "energy": 2349
    },
    { 
      _id: 4,
      "timestamp": "2023-07-14T10:40:00Z"
      "code": "ah",
      "energy": 2355
    },
    { 
      _id: 5,
      "timestamp": "2023-07-14T10:50:00Z"
      "code": "kk",
      "energy": 2360
    },
    { 
      _id: 6,
      "timestamp": "2023-07-14T11:00:00Z"
      "code": "la",
      "energy": 2370
    },
    { 
      _id: 7,
      "timestamp": "2023-07-14T10:00:00Z"
      "code": "as",
      "energy": 2380
    },
     { 
      _id: 8,
      "timestamp": "2023-07-15T10:00:00Z"
      "code": "as",
      "energy": 9910
    },
     { 
      _id: 9,
      "timestamp": "2023-07-15T10:15:00Z"
      "code": "as",
      "energy": 11447
    },
     { 
      _id: 10,
      "timestamp": "2023-07-15T10:30:00Z"
      "code": "as",
      "energy": 19309.4
    },
     { 
      _id: 11,
      "timestamp": "2023-07-15T10:45:00Z"
      "code": "as",
      "energy": 29968
    },
     { 
      _id: 12,
      "timestamp": "2023-07-15T11:00:00Z"
      "code": "as",
      "energy": 49040
    },
     { 
      _id: 13,
      "timestamp": "2023-07-16T10:00:00Z"
      "code": "as",
      "energy": 80442
    },
     { 
      _id: 14,
      "timestamp": "2023-07-16T10:15:00Z"
      "code": "as",
      "energy": 1025852
    },
    { 
      _id: 16,
      "timestamp": "2023-07-17T10:10:00Z"
      "code": "uj",
      "energy": 4567659
    },
    { 
      _id: 17,
      "timestamp": "2023-07-17T10:30:00Z"
      "code": "la",
      "energy": 564546
    },
    { 
      _id: 18,
      "timestamp": "2023-07-17T10:40:00Z"
      "code": "ws",
      "energy": 5654348
    },
    { 
      _id: 19,
      "timestamp": "2023-07-18T10:50:00Z"
      "code": "lk",
      "energy": 6765436
    },
    { 
      _id: 20,
      "timestamp": "2023-07-18T11:00:00Z"
      "code": "pl",
      "energy": 7654223
    }

Solution

  • You can try this one:

    db.collection.aggregate([
       {
          $set: {
             day: {
                $dateTrunc: {
                   date: "$timestamp",
                   unit: "day",
                   timezone: "Europe/Zurich"
                }
             }
          }
       },
       {
          $setWindowFields: {
             sortBy: { timestamp: 1 },
             output: {
                average: {
                   $avg: "$energy",
                   window: { documents: [-5, "current"] }
                }
             }
          }
       },
       {
          $densify: {
             field: "day",
             range: {
                step: 1,
                unit: "day",
                bounds: "full"
             }
          }
       },
       {
          $fill: {
             sortBy: { day: 1, timestamp: 1 },
             output: {
                average: { method: "locf" }
             }
          }
       },
       {
          $set: {
             timestamp: { $ifNull: ["$timestamp", "$day"] },
             average: "$$REMOVE",
             energy: { $ifNull: ["$energy", "$average"] },
             day: "$$REMOVE",
          }
       }
    ])
    

    Mongo Playground

    Note, $fill also provides a linear regression function which may fit even better.

    Update

    If you need data every 15 Minutes, then it is even simpler:

    db.collection.aggregate([
       {
          $setWindowFields: {
             sortBy: { timestamp: 1 },
             output: {
                average: {
                   $avg: "$energy",
                   window: { documents: [-5, "current"] }
                }
             }
          }
       },
       {
          $densify: {
             field: "timestamp",
             range: {
                step: 15,
                unit: "minute",
                bounds: "full"
             }
          }
       },
       {
          $fill: {
             sortBy: { timestamp: 1 },
             output: {
                average: { method: "locf" }
             }
          }
       },
       {
          $set: {
             average: "$$REMOVE",
             energy: { $ifNull: ["$energy", "$average"] }
          }
       }
    ])