I would like to read a csv file and upload each row to a couchdb using a grunt task. At this point I am not yet doing any database validation such as checking if the record already exists but will have to do that at some point also.
Currently this is what I am doing and the problem is only the first 65 rows, of the first sub task named people
is being uploaded to couchdb.
I know this has something to do with asynchronous execution but just can't work out how to do this
Gruntils.js
csv2couch: {
people: {
db: 'http://localhost:5984/db',
collectionName: 'person',
src:['./data/schema3/people.csv']
},
organisms: {
db: '<%= qmconfig.COUCHDBURL %>',
collectionName: 'organism',
src:['./data/schema3/organisms.csv']
}
}
csv2couch.js
'use strict';
var nanolib = require('nano'),
csv = require('csv'),
urls = require('url'),
fs = require('fs');
module.exports = function(grunt) {
grunt.registerMultiTask('csv2couch', 'Parse csv file and upload data to couchdb.', function() {
var done, parts, dbname, _this, collectionName;
_this = this;
done = this.async();
parts = urls.parse(this.data.db);
dbname = parts.pathname.replace(/^\//, '');
collectionName = this.data.collectionName;
// Merge task-specific and/or target-specific options with these defaults.
var options = this.options({});
// couchdb connection
try {
var nano = nanolib(parts.protocol + '//' + parts.host);
} catch (e) {
grunt.warn(e);
done(e, null);
}
// database connection
var db = nano.use(dbname);
// process each source csv file
this.filesSrc.forEach(function(f) {
console.log('source file:', f);
csv()
.from.path(f, {
columns:true,
delimeter:',',
quote:'"'
})
.on('record', function(row,index){
console.log('#'+index, row);
save(row, collectionName);
})
.on('end', function(count){
console.log('Number of lines: '+count);
done();
})
.on('error', function(error){
console.log(error.message);
done(error);
});
});
function save (data, collectionName) {
// document ID is concatenation of collectionName and ID
var docID = collectionName[0]+'_'+data.ID;
// add some additional data
data.type = collectionName;
// insert data into couchdb
db.insert(data, docID, function(err, body, header) {
if (err) {
console.log('[db.insert] ', err.message);
return;
}
});
}
});
};
You're right, the async code is incorrect. The CSV file is being read to the end before all your records are saved. You need to call done only when your last record has been saved.
Your save method needs to take a callback
var rowsRead = 0, // the number of rows read from the csv file
rowsWritten = 0; // the number of rows written to CouchdDb
caller:
.on('record', function(row,index){
rowsRead++;
save(row, collectionName, function(err){
if(err){
return done(err);
}
rowsWritten++;
if(rowsRead===rowsWritten){ // check if we've written all records to CouchDb
done();
}
});
})
save method:
function save (data, collectionName, callback) {
// document ID is concatenation of collectionName and ID
var docID = collectionName[0]+'_'+data.ID;
// add some additional data
data.type = collectionName;
// insert data into couchdb
db.insert(data, docID, callback);
}