I am experiencing high memory consumption on my Node.js app, when loading ~100MB zip files one after the other it is keeping them in memory as a "NodeBufferReader". The library I am using is called JSZip and is found here: https://stuk.github.io/jszip/
If I access the same zip file twice then it doesn't increase memory usage but for every 'extra' .zip file I access the memory increases by approx the size of the .zip file. The files I am accessing are all around 100MB or larger so as you can imagine this has the potential to get rather large, rather quickly.
The Node.js application is a websocket server that reads files from within .zip files and returns them back to the requestor as base64 data. The function in question is here:
function handleFileRequest(args, connection_id) {
var zipIndex = 0,
pathLen = 0,
zip_file = "",
zip_subdir = "";
try {
if (args.custom.file.indexOf(".zip") > -1) {
// We have a .zip directory!
zipIndex = args.custom.file.indexOf(".zip") + 4;
pathLen = args.custom.file.length;
zip_file = args.custom.file.substring(0, zipIndex);
zip_subdir = args.custom.file.substring(zipIndex + 1, pathLen);
fs.readFile(zip_file, function (err, data) {
if (!err) {
zipObj.load(data);
if (zipObj.file(zip_subdir)) {
var binary = zipObj.file(zip_subdir).asBinary();
var base64data = btoa(binary);
var extension = args.custom.file.split('.').pop();
var b64Header = "data:" + MIME[extension] + ";base64,";
var tag2 = args.custom.tag2 || "unset";
var tag3 = args.custom.tag3 || "unset";
var rargs = {
action: "getFile",
tag: args.tag,
dialogName: connections[connection_id].dialogName,
custom: {
file: b64Header + base64data,
tag2: tag2,
tag3: tag3
}
};
connections[connection_id].sendUTF(JSON.stringify(rargs));
rargs = null;
binary = null;
base64data = null;
} else {
serverLog(connection_id, "Requested file doesn't exist");
}
} else {
serverLog(connection_id, "There was an error retrieving the zip file data");
}
});
} else {
// File isn't a .zip
}
} catch (e) {
serverLog(connection_id, e);
}
}
Any help would be much appreciated in getting rid of this problem - Thanks!
Working Code Example
function handleFileRequest(args, connection_id) {
var zipIndex = 0,
pathLen = 0,
f = "",
d = "";
try {
if (args.custom.file.indexOf(".zip") > -1) {
// We have a .zip directory!
zipIndex = args.custom.file.indexOf(".zip") + 4;
pathLen = args.custom.file.length;
f = args.custom.file.substring(0, zipIndex);
d = args.custom.file.substring(zipIndex + 1, pathLen);
fs.readFile(f, function (err, data) {
var rargs = null,
binary = null,
base64data = null,
zipObj = null;
if (!err) {
zipObj = new JSZip();
zipObj.load(data);
if (zipObj.file(d)) {
binary = zipObj.file(d).asBinary();
base64data = btoa(binary);
var extension = args.custom.file.split('.').pop();
var b64Header = "data:" + MIME[extension] + ";base64,";
var tag2 = args.custom.tag2 || "unset";
var tag3 = args.custom.tag3 || "unset";
rargs = {
action: "getFile",
tag: args.tag,
dialogName: connections[connection_id].dialogName,
custom: {
file: b64Header + base64data,
tag2: tag2,
tag3: tag3
}
};
connections[connection_id].sendUTF(JSON.stringify(rargs));
} else {
serverLog(connection_id, "Requested file doesn't exist");
}
} else {
serverLog(connection_id, "There was an error retrieving the zip file data");
}
rargs = null;
binary = null;
base64data = null;
zipObj = null;
});
} else {
// Non-Zip file
}
} catch (e) {
serverLog(connection_id, e);
}
}
If you use the same JSZip instance to load each and every file, you will keep everything in memory : the load
method doesn't replace the existing content.
Try using a new JSZip instance each time :
var zipObj = new JSZip();
zipObj.load(data);
// or var zipObj = new JSZip(data);