I'm trying to realize a file upload in Node.js 4.x using express with busboy. I'm already able to upload files and store them in an Azure Blob Storage.
No I'd like to verify the file type, before storing it to Azure, and reject any file which is not valid.
I'd like to do the validation using magic numbers. I found
const fileType = require('file-type');
which determines the file type for me.
Now I'm trying to get this work as efficient as possible, but here is where I'm struggling: I want to directly pipe the file stream to azure. But before that, I need to read the first 5 bytes from the stream into a buffer which is processed by file-type.
Reading from stream and then piping to azure surely does not work. After some research I found a solution by piping the file in 2 PassThrough streams. But now I'm struggling in correctly handling those 2 streams.
const fileType = require('file-type');
const pass = require('stream').PassThrough;
//...
req.busboy.on('file', function (fieldname, file, filename) {
console.log("Uploading: " + filename);
var b = new pass;
var c = new pass;
file.pipe(b);
file.pipe(c);
var type = null;
b.on('readable', function() {
b.pause();
if(type === null) {
var chunk = b.read(5);
type = fileType(chunk) || false;
b.end();
}
});
b.on('finish', function() {
if(type && ['jpg', 'png', 'gif'].indexOf(type.ext) !== -1) {
var blobStream = blobSvc.createWriteStreamToBlockBlob(storageName,
blobName,
function (error) {
if (error) console.log('blob upload error', error);
else console.log('blob upload complete')
});
c.pipe(blobStream);
}
else {
console.error("Rejected file of type " + type);
}
});
});
This solution sometimes works - and sometimes there is some "write after end" error. Also, I think the streams are not properly closed, because normally, after a request, express logs something like this on console:
POST /path - - ms - -
But this log message now comes like 30s-60s after "blob upload complete", probably due to some timeout.
Any idea how to fix this?
You don't need to add additional streams into the mix. Just unshift()
the consumed portion back onto the stream. For example:
const fileType = require('file-type');
req.busboy.on('file', function (fieldname, file, filename) {
function readFirstBytes() {
var chunk = file.read(5);
if (!chunk)
return file.once('readable', readFirstBytes);
var type = fileType(chunk);
if (type.ext === 'jpg' || type.ext === 'png' || type.ext === 'gif') {
const blobStream = blobSvc.createWriteStreamToBlockBlob(
storageName,
blobName,
function (error) {
if (error)
console.log('blob upload error', error);
else
console.log('blob upload complete');
}
);
file.unshift(chunk);
file.pipe(blobStream);
} else {
console.error('Rejected file of type ' + type);
file.resume(); // Drain file stream to continue processing form
}
}
readFirstBytes();
});