I'm trying to download a phar (PHP Archive) with my program in node.js. After some testing, I've discovered that the files are not the same when downloaded from a browser and from node.js.
I've tried a few things:
"binary" encoding
"utf8" encoding
None of theses works.
Do anyone know which encoding should I use to download a phar?
My code:
exports.download = function(urlStr, dest, cb) { // urlStr is the url link of the phar, dest is the destination file, and cb is the callback.
var options = {
headers: {
"User-Agent": "PSM (Pocketmine Server Manager, https://psm.mcpe.fun) User Requester"
}
}
var data = "";
var url = new URL(urlStr);
options.hostname = url.hostname;
options.path = url.pathname;
var request = http.get(options, function(response) {
// check if response is success
if (response.statusCode == 302 || response.statusCode == 301) {
exports.download(response.headers["location"], dest, cb);
return;
}
response.on("data", function(chunk) {
data += chunk.toString("binary");
})
response.on('end', function() {
fs.writeFileSync(dest, data, "binary");
cb();
});
}).on('error', function(err) { // Handle errors
fs.unlink(dest); // Delete the file async. (But we don't check the result)
if (cb) cb(err.message);
});
};
Phar used (valid): https://psm.mcpe.fun/download/PSMCore/1.1.phar
I noticed a few issues in the original code:
The main issue is that the binary data is being implicitly converted to a UTF-8 string, which will not preserve the content. Keep the data in Buffer
form or just pipe the response to disk using streams instead of buffering the entire response in memory first.
When using asynchronous callbacks in node, it is by convention that you pass an actual Error
object as the first argument instead of a string. Many times these objects provide much more information either not contained in the message itself, or is not as easily parseable from the error message (e.g. stack trace, libuv error code, contextual information such as http URIs, file paths, hostnames, etc.).
No 200 status code is being checked before saving the response to disk. You could end up saving error html pages (e.g. 400, 404, etc.) to disk instead of what you're expecting. You should also check that the Content-Type header is what you're expecting to further ensure the response is what you think it is.
An example with the first two items fixed is:
var res;
function onError(err) {
if (res) {
res.resume(); // Ensure response is drained
res = null;
}
if (cb) {
fs.unlink(dest, function() {});
cb(err);
cb = null;
}
}
http.get(options, function(response) {
// Check if response is success
if (response.statusCode === 302 || response.statusCode === 301) {
exports.download(response.headers["location"], dest, cb);
res.resume(); // Ensure response is drained
return;
}
res = response;
var out = fs.createWriteStream(dest);
out.on('error', onError);
response.pipe(out).on('close', function() {
if (cb) {
cb();
cb = null;
}
});
}).on('error', onError);