Search code examples
node.jsmongodbnode-mongodb-native

MongoDB nodejs driver not returning more than 100000 rows


This is an example to replicate my issue:

I populate my collection with 1million documents like this:

for(i=1; i<=1000000; i++){
if(i%3===0)
     db.numbers.insert({_id:i, stuff:"Some data", signUpDate: new Date()});
else
     db.numbers.insert({_id:i, stuff:"Some data"});
}

So, every 3rd document has a signUpDate

I create the following index:

db.numbers.ensureIndex({"signUpDate" : 1});

Then, I have the following very small app using nodejs:

var Db = require('mongodb').Db
, Connection = require('mongodb').Connection
, Server = require('mongodb').Server
, format = require('util').format;

var host = 'localhost';
var port = Connection.DEFAULT_PORT;

console.log("Connecting to " + host + ":" + port);

Db.connect(format("mongodb://%s:%s/test?w=1", host, port), function(err, db) {
        var collection = db.collection('numbers');

        collection.find({'signedUp': true}, {'_id':1}).limit(100000).toArray(function(err, docs){
                console.log(docs.length)
        });
});

This works fine.

However, If I remove the .limit(100000), the server sits there and never responds.

In a nutshell, all I'm trying to do is return a list of _id's where signUpDate is not null (there should be around 333,000)

I'm pretty sure the issue is the way mongodb caches, but I'm not sure how I can work around this?


Solution

  • You shouldn't call toArray on a large result set like this. Instead, either:

    Iterate over the results using each:

    collection.find({'signedUp': true}, {'_id':1}).each(function(err, doc){
        if (doc) {
            console.log(doc);
        } else {
            console.log('All done!');
        }
    });
    

    or stream the results:

    var stream = collection.find({'signedUp': true}, {'_id':1}).stream();
    stream.on('data', function(doc) {
        console.log(doc);
    });
    stream.on('close', function() {
        console.log('All done!');
    });