Search code examples
node.jscsvgruntjscouchdbcouchdb-nano

read csv with headers then upload each row to couchdb using node/grunt


I would like to read a csv file and upload each row to a couchdb using a grunt task. At this point I am not yet doing any database validation such as checking if the record already exists but will have to do that at some point also.

Currently this is what I am doing and the problem is only the first 65 rows, of the first sub task named people is being uploaded to couchdb.

I know this has something to do with asynchronous execution but just can't work out how to do this

Gruntils.js

csv2couch: {
    people: {
        db: 'http://localhost:5984/db',
        collectionName: 'person',
        src:['./data/schema3/people.csv']
    },
    organisms: {
        db: '<%= qmconfig.COUCHDBURL %>',
        collectionName: 'organism',
        src:['./data/schema3/organisms.csv']
    }

}

csv2couch.js

'use strict';

var nanolib = require('nano'),
    csv = require('csv'),
    urls = require('url'),
    fs = require('fs');

module.exports = function(grunt) {

    grunt.registerMultiTask('csv2couch', 'Parse csv file and upload data to couchdb.', function() {

        var done, parts, dbname, _this, collectionName;
        _this = this;
        done = this.async();
        parts = urls.parse(this.data.db);
        dbname = parts.pathname.replace(/^\//, '');
        collectionName = this.data.collectionName;

        // Merge task-specific and/or target-specific options with these defaults.
        var options = this.options({});

        // couchdb connection
        try {
            var nano = nanolib(parts.protocol + '//' + parts.host);
        } catch (e) {
            grunt.warn(e);
            done(e, null);
        }

        // database connection
        var db = nano.use(dbname);

        // process each source csv file
        this.filesSrc.forEach(function(f) {

            console.log('source file:', f);

            csv()
                .from.path(f, {
                    columns:true,
                    delimeter:',',
                    quote:'"'
                })
                .on('record', function(row,index){
                  console.log('#'+index, row);
                  save(row, collectionName); 
                })
                .on('end', function(count){
                  console.log('Number of lines: '+count);
                  done();
                })
                .on('error', function(error){
                  console.log(error.message);
                  done(error);
                });
        });

        function save (data, collectionName) {

            // document ID is concatenation of collectionName and ID 
            var docID = collectionName[0]+'_'+data.ID;

            // add some additional data
            data.type = collectionName;

            // insert data into couchdb
            db.insert(data, docID, function(err, body, header) {
              if (err) {
                console.log('[db.insert] ', err.message);
                return;
              }
            });
        }

    });

};

Solution

  • You're right, the async code is incorrect. The CSV file is being read to the end before all your records are saved. You need to call done only when your last record has been saved.

    Your save method needs to take a callback

    var rowsRead = 0,  // the number of rows read from the csv file
      rowsWritten = 0; // the number of rows written to CouchdDb
    

    caller:

    .on('record', function(row,index){
      rowsRead++;
      save(row, collectionName, function(err){
        if(err){
          return done(err);
        }
        rowsWritten++;
        if(rowsRead===rowsWritten){ // check if we've written all records to CouchDb
          done();
        }
      }); 
    })
    

    save method:

    function save (data, collectionName, callback) {
      // document ID is concatenation of collectionName and ID 
      var docID = collectionName[0]+'_'+data.ID;
    
      // add some additional data
      data.type = collectionName;
    
      // insert data into couchdb
      db.insert(data, docID, callback);
    }