Search code examples
javascriptnode.jszipzlibunzip

How to download and unzip a zip file in memory in NodeJs?


I want to download a zip file from the internet and unzip it in memory without saving to a temporary file. How can I do this?

Here is what I tried:

var url = 'http://bdn-ak.bloomberg.com/precanned/Comdty_Calendar_Spread_Option_20120428.txt.zip';

var request = require('request'), fs = require('fs'), zlib = require('zlib');

  request.get(url, function(err, res, file) {
     if(err) throw err;
     zlib.unzip(file, function(err, txt) {
        if(err) throw err;
        console.log(txt.toString()); //outputs nothing
     });
  });

[EDIT] As, suggested, I tried using the adm-zip library and I still cannot make this work:

var ZipEntry = require('adm-zip/zipEntry');
request.get(url, function(err, res, zipFile) {
        if(err) throw err;
        var zip = new ZipEntry();
        zip.setCompressedData(new Buffer(zipFile.toString('utf-8')));
        var text = zip.getData();
        console.log(text.toString()); // fails
    });

Solution

  • You need a library that can handle buffers. The latest version of adm-zip will do:

    npm install adm-zip
    

    My solution uses the http.get method, since it returns Buffer chunks.

    Code:

    var file_url = 'http://notepad-plus-plus.org/repository/7.x/7.6/npp.7.6.bin.x64.zip';
    
    var AdmZip = require('adm-zip');
    var http = require('http');
    
    http.get(file_url, function(res) {
      var data = [], dataLen = 0; 
    
      res.on('data', function(chunk) {
        data.push(chunk);
        dataLen += chunk.length;
    
      }).on('end', function() {
        var buf = Buffer.alloc(dataLen);
    
        for (var i = 0, len = data.length, pos = 0; i < len; i++) { 
          data[i].copy(buf, pos); 
          pos += data[i].length; 
        } 
    
        var zip = new AdmZip(buf);
        var zipEntries = zip.getEntries();
        console.log(zipEntries.length)
    
        for (var i = 0; i < zipEntries.length; i++) {
          if (zipEntries[i].entryName.match(/readme/))
            console.log(zip.readAsText(zipEntries[i]));
        }
      });
    });
    

    The idea is to create an array of buffers and concatenate them into a new one at the end. This is due to the fact that buffers cannot be resized.

    Update

    This is a simpler solution that uses the request module to obtain the response in a buffer, by setting encoding: null in the options. It also follows redirects and resolves http/https automatically.

    var file_url = 'https://github.com/mihaifm/linq/releases/download/3.1.1/linq.js-3.1.1.zip';
    
    var AdmZip = require('adm-zip');
    var request = require('request');
    
    request.get({url: file_url, encoding: null}, (err, res, body) => {
      var zip = new AdmZip(body);
      var zipEntries = zip.getEntries();
      console.log(zipEntries.length);
    
      zipEntries.forEach((entry) => {
        if (entry.entryName.match(/readme/i))
          console.log(zip.readAsText(entry));
      });
    });
    

    The body of the response is a buffer that can be passed directly to AdmZip, simplifying the whole process.