Search code examples
mongodbnode.jsinsertdata-integrity

Mongodb inserts not completing successfully (using node.js)


I've got a node.js script that loads an XML file. It loops through each element in the Mongo array and says that they're all getting inserted correctly, but when the script has completed a check of db.collection.count(); tells me that far fewer records have been inserted into the database than the number expected.

How can I make mongo and node.js play nicely with inserts?

GrabRss = function() {
var http    = require('http');
var sys     = require('sys');
var xml2js  = require('xml2js');
var fs      = require('fs');

var Db      = require('../lib/mongodb').Db,
    Conn    = require('../lib/mongodb').Connection,
    Server  = require('../lib/mongodb').Server,
    // BSON = require('../lib/mongodb').BSONPure;
    BSON    = require('../lib/mongodb').BSONNative;

var data;
var checked = 0;
var len = 0;

GotResponse = function(res) {
    var ResponseBody = "";
    res.on('data', DoChunk);
    res.on('end', EndResponse);

    function DoChunk(chunk){
        ResponseBody += chunk;
    }
    function EndResponse() {
        //console.log(ResponseBody);
        var parser = new xml2js.Parser();
        parser.addListener('end', GotRSSObject);
        parser.parseString(ResponseBody);
    }
}

GotError = function(e) {
    console.log("Got error: " + e.message);
}

GotRSSObject = function(r){
    items = r.item;
    //console.log(sys.inspect(r));

    var db = new Db('myrssdb', new Server('localhost', 27017, {}), {native_parser:false});
    db.open(function(err, db){
         db.collection('items', function(err, col) {
            len = movies.length;
            for (i in items) {
                SaveItem(items[i], col);
            }
         });
    });
}

SaveItem = function(m, c) {
            /*  REPLACE FROM HERE IN ANSWER  */
    c.find({'id': m.id}, function(err, cursor){
        cursor.nextObject(function(err, doc) { 
            if (doc == null) {
                c.insert(m, function(err, docs) {
                    docs.forEach(function(doc) {
                        console.log('Saved: '+doc.id+' '+doc.keywords);
                    });
                });
            } else {
                console.log('Skipped: '+m.id);
            }
            if (++checked >= len) {
                process.exit(0);
            } 
        });
    });
            /*  REPLACE TO HERE IN ANSWER  */
}
//http.get(options, GotResponse).on('error', GotError);
var x2js = new xml2js.Parser();

fs.readFile('/home/ubuntu/myrss.rss', function(err, data) {
    x2js.parseString(data);
});

x2js.addListener('end', GotRSSObject);

}
GrabRss();

As requested, the code is above. The file is read locally (though is used to be an HTTP request, but it's a 25 meg file now, lots of RSS records)

I just ran the file with some ~10k records in it and a count of the items in the mongoDB after the script has run is about 800 items.

As per the answer I replaced the insert code:

with:

c.update({'id': m.id}, {$set: m}, {upsert: true, safe: true}, function(err){
    if (err) console.warn(err.message);
        else console.log(m.keywords);
        if (++checked >= len) {
            console.log(len);
            //process.exit(0);

            process.exit(0);
        } 
});

Solution

  • By default, MongoDB writes do not check for an error.

    You need to set safe:true in the options to your insert, as explained in the documentation for node-mongodb-native:

    var collection = new mongodb.Collection(client, 'test_collection');
    collection.insert({hello: 'world'}, {safe:true},
                      function(err, objects) {
      if (err) console.warn(err.message);
      if (err && err.message.indexOf('E11000 ') !== -1) {
        // this _id was already inserted in the database
      }
    });
    

    Otherwise your callback will not be invoked for errors and your client won't know about them.

    You probably also want to look at upserts and updates, as it is incredibly inefficient to do find & insert if null in a loop.

    Instead, upsert will update if the matching document exists, otherwise it will insert a new one. An explanation on how to do this in Node is in the documentaiton for the driver.