Search code examples
javascriptnode.jsjsonxmlbluebird

Bulk convert XML to JSON with particular output


How do I convert approximately 3000 XML files into JSON files using node?

I've been able to get a script below to a single XML to JSON file in the format that I want, and I've been attempting to promisify the script using bluebird, but I keep getting errors. I've been able to get the script below to list the filenames, but then I get the error "Unhandled rejection Error: ENOENT: no such file or directory, open 'journal-article-10.2307_357359.xml'"

var Promise = require('bluebird');
var fs = require('fs');
var convert = require('xml-js');

fs.readdirAsync = function(dirname) {
    return new Promise(function(resolve, reject) {
        fs.readdir(dirname, function(err, filenames){
            if (err)
                reject(err);
            else
                resolve(filenames);
        });
    });
};

fs.readFileAsync = function(filename, enc) {
    return new Promise(function(resolve, reject) {
        fs.readFile(filename, enc, function(err, data){
            if (err)
                reject(err);
            else
                resolve(data);
        });
    });
};

function getFile(filename) {
    return fs.readFileAsync(filename, 'utf8');
}

fs.readdirAsync('./metadata/').then(function (filenames){
    console.log(filenames);
    return Promise.all(filenames.map(getFile));
}).then(function (files){
  files.forEach(function(files){
    function nativeType(value) {
      var nValue = Number(value);
      if (!isNaN(nValue)) {
        return nValue;
      }
      var bValue = value.toLowerCase();
      if (bValue === 'true') {
        return true;
      } else if (bValue === 'false') {
        return false;
      }
      return value;
    }
    var removeJsonTextAttribute = function(value, parentElement) {
      try {
        var keyNo = Object.keys(parentElement._parent).length;
        var keyName = Object.keys(parentElement._parent)[keyNo - 1];
        parentElement._parent[keyName] = nativeType(value);
      } catch (e) {}
    };
    var options = {
      compact: true,
      trim: true,
      ignoreDeclaration: true,
      ignoreInstruction: true,
      ignoreAttributes: true,
      ignoreComment: true,
      ignoreCdata: true,
      ignoreDoctype: true,
      textFn: removeJsonTextAttribute,
      spaces: 2
    };
    fs.writeFile("./json/" + fileaname + ".json", convert.xml2json(options));
  });
});

I would like to be able to convert the entire folder of XML files to JSON (to upload to couchDB).


Solution

  • ENOENT is a standard POSIX error code that means the path to the file is not found. You've tried to open the name of a file or directory that doesn't exist. In this case, fs.readdir returns names that are not fully qualified file names, so you'll need to prefix them with the path that you gave it, specifically: './metadata/'. The error message that you're seeing tells you the file that was opened: journal-article-10.2307_357359.xml, but in this case, you probably want to open ./metadata/journal-article-10.2307_357359.xml.

    You can see this with the following simple example:

    # Create a dummy directory named `garbage` that contains only 3 entries
    $ mkdir -p garbage/{foo,bar,baz}
    # Run `node` interactively
    $ node
    > const fs = require('fs');
    undefined
    > fs.readdirSync('./garbage')
    [ 'bar', 'baz', 'foo' ]
    

    It doesn't make sense to open 'bar' since that doesn't exist. You'll need to open './garbage/bar' for it to work correctly.