Search code examples
node.jsfile-uploadexpresschecksum

How can I get the checksum of a (large) file using ExpressJS/Connect/Crypto?


I need to generate a checksum for a large file (.ISO), on file upload. I'm using ExpressJS/Connect to handle an upload request via POST (multipart)

  //express setup app.js
  app.use('/upload',express.bodyParser({
      defer : true, //enable progress event
      keepExtensions : true,
      limit : app.locals.fileLimit,
      uploadDir : path.join( __dirname, '..', 'tmp'),
      hash : true //this doesnt appear to do anything
  }));

  //express route routes.js
  app.post("/upload", function(req, res){

      var fs     = require('fs');
      var format = require('util').format;

      req.form.hash = true; //this doesnt return the hash for some reason

      req.form.on('fileBegin', function(err) {
         log.info("Starting new upload...");
      });

      req.form.on('file', function(name, file) {
          fs.rename(file.path, req.form.uploadDir + "/" + file.name);
        console.info(file);
      });

      req.form.on('error', function(err) {
         log.error('error',err);
         req.resume();
      });



      req.form.on('field', function(field, value) {
         log.info("Passed Field (%s=%s)...",field,value);
      });

      //this part event never gets called for some reason but
      //I imagine this is where you might do a hash update
      req.form.on('part', function(part){
        console.log('PART upload %s %s', part.name, part.filename);
        var out = fs.createWriteStream('../tmpstrm/' + part.filename);
        part.pipe(out);
      });


      req.form.on('progress', function(bytesReceived, bytesExpected) {
          var percent = (bytesReceived / bytesExpected * 100) | 0;
          console.info( "PROGRESS :"+ ((bytesReceived / bytesExpected)*100) + "% uploaded");
      });

      req.form.on('end', function() {
          var fileInfo = format('\nuploaded %s (%d Kb) to %s as %s'
            , req.files.upload.name
            , req.files.upload.size / 1024 | 0
            , req.files.upload.path
            , req.body.title);

          res.locals.fileInfo = fileInfo; //renders info in jade

          res.render('media/index');
      });

  });

I couldn't find too much information on my particular question; it's probably not a typical use-case. But the one question I did find had a brief example of how to update the hash on a data event

var hash = crypto.createHash('sha1');
var data = // incoming file data

data.on('data', function(chunk){
  hash.update(chunk);
});
data.on('end', function(){
  var sha = hash.digest('hex');

  // Do something with it
})

I'm new to streaming, chunks, etc, so some guidance here is appreciated.


Solution

  • There's some example code you might find helpful in my writers-digest npm module source code:

    https://github.com/focusaurus/writers-digest/blob/ab52ffca6e989ca657dc449e05ff258c428fa3af/index.js#L38

    But basically you have 2 approaches:

    1. Allow the bodyParser to stream the HTTP request body to the filesystem, and then subsequently compute the checksum by streaming data from the filesystem (using the writers-digest code as an example).

    2. You can try to compute the checksum while the actual HTTP request body is arriving by binding event handler functions to the req objects data and end events. You should be able to do this while the bodyParser also streams it to disk. Just make sure the events are bound early enough so you don't miss any chunks.