Search code examples
mongodbmongodb-querynosql-aggregation

MongoDB Count() vs. Aggregation


I've used aggregation in mongo a lot, I know performance benefits on the grouped counts and etc. But, do mongo have any difference in performance on those two ways to count all documents in a collection?:

collection.aggregate([
  {
    $match: {}
  },{
    $group: {
      _id: null, 
      count: {$sum: 1}
    }
}]);

and

collection.find({}).count()

Update: Second case: Let's say we have this sample data:

{_id: 1, type: 'one', value: true}
{_id: 2, type: 'two', value: false}
{_id: 4, type: 'five', value: false}

With aggregate():

var _ids = ['id1', 'id2', 'id3'];
var counted = Collections.mail.aggregate([
  {
    '$match': {
      _id: {
        '$in': _ids
      },
      value: false
    }
  }, {
    '$group': {
      _id: "$type",
      count: {
        '$sum': 1
      }
    }
  }
]);

With count():

var counted = {};
var type = 'two';
for (i = 0, len = _ids.length; i < len; i++) {
  counted[_ids[i]] = Collections.mail.find({
    _id: _ids[i], value: false, type: type
  }).count();
}

Solution

  • .count() is by far faster. You can see the implementation by calling

    // Note the missing parentheses at the end
    db.collection.count
    

    which returns the length of the cursor. of the default query (if count() is called with no query document), which in turn is implemented as returning the length of the _id_ index, iirc.

    An aggregation, however, reads each and every document and processes it. This can only be halfway in the same order of magnitude with .count() when doing it over only some 100k of documents (give and take according to your RAM).

    Below function was applied to a collection with some 12M entries:

    function checkSpeed(col,iterations){
    
      // Get the collection
      var collectionUnderTest = db[col];
    
      // The collection we are writing our stats to
      var stats = db[col+'STATS']
    
      // remove old stats
      stats.remove({})
    
      // Prevent allocation in loop
      var start = new Date().getTime()
      var duration = new Date().getTime()
    
      print("Counting with count()")
      for (var i = 1; i <= iterations; i++){
        start = new Date().getTime();
        var result = collectionUnderTest.count()
        duration = new Date().getTime() - start
        stats.insert({"type":"count","pass":i,"duration":duration,"count":result})
      }
    
      print("Counting with aggregation")
      for(var j = 1; j <= iterations; j++){
        start = new Date().getTime()
        var doc = collectionUnderTest.aggregate([{ $group:{_id: null, count:{ $sum: 1 } } }])
        duration = new Date().getTime() - start
        stats.insert({"type":"aggregation", "pass":j, "duration": duration,"count":doc.count})
      }
    
      var averages = stats.aggregate([
       {$group:{_id:"$type","average":{"$avg":"$duration"}}} 
      ])
    
      return averages
    }
    

    And returned:

    { "_id" : "aggregation", "average" : 43828.8 }
    { "_id" : "count", "average" : 0.6 }
    

    The unit is milliseconds.

    hth