I'm using the AWS s3 getObject method in a node application to download a zip file, and then using child_process.exec to call unzip on it:
var file = fs.createWriteStream(file_path);
s3.getObject(params).
on('httpData', function(chunk) {
process.stdout.write(".");
file.write(chunk);
}).
on('httpDone', function() {
file.end();
console.log('Download Complete');
self.emit('downloadComplete');
}).
send();
and on the downloadComplete
event, this code is called, which throws the error:
exec = require('child_process').exec;
exec('unzip -o -qq ' + src + ' -d ' + dest, function (error, stdout, stderr) {
callback(stderr);
});
The exec call comes back with this error:
End-of-central-directory signature not found. Either this file is not
a zipfile, or it constitutes one disk of a multi-part archive. In the
latter case the central directory and zipfile comment will be found on
the last disk(s) of this archive.
However, if I set a short timeout before I try the unzip, i.e:
setTimeout(function() {
self.emit('downloadComplete');
}, 100);
It works. Is there a bug in the AWS node lib, or maybe am I using the wrong completion event?
You should instead emit your download complete event in a finish
event handler on the file stream:
var file = fs.createWriteStream(file_path);
file.on('finish', function() {
self.emit('downloadComplete');
});
s3.getObject(params).
on('httpData', function(chunk) {
process.stdout.write(".");
file.write(chunk);
}).
on('httpDone', function() {
file.end();
console.log('Download Complete');
}).
send();
On an unrelated note, you should probably also use normal streams so that backpressure can do its thing when the disk can't keep up. For example:
var file = fs.createWriteStream(file_path);
s3.getObject(params)
.createReadStream()
.pipe(file)
.on('finish', function() {
self.emit('downloadComplete');
});