Search code examples
javascriptnode.jsnode-webkitjszip

JSZip Memory Issue


I am experiencing high memory consumption on my Node.js app, when loading ~100MB zip files one after the other it is keeping them in memory as a "NodeBufferReader". The library I am using is called JSZip and is found here: https://stuk.github.io/jszip/

If I access the same zip file twice then it doesn't increase memory usage but for every 'extra' .zip file I access the memory increases by approx the size of the .zip file. The files I am accessing are all around 100MB or larger so as you can imagine this has the potential to get rather large, rather quickly.

The Node.js application is a websocket server that reads files from within .zip files and returns them back to the requestor as base64 data. The function in question is here:

function handleFileRequest(args, connection_id) {
    var zipIndex = 0,
        pathLen = 0,
        zip_file = "",
        zip_subdir = "";
    try {
        if (args.custom.file.indexOf(".zip") > -1) {
            // We have a .zip directory!
            zipIndex = args.custom.file.indexOf(".zip") + 4;
            pathLen = args.custom.file.length;

            zip_file = args.custom.file.substring(0, zipIndex);
            zip_subdir = args.custom.file.substring(zipIndex + 1, pathLen);

            fs.readFile(zip_file, function (err, data) {
                if (!err) {
                    zipObj.load(data);
                    if (zipObj.file(zip_subdir)) {
                        var binary = zipObj.file(zip_subdir).asBinary();
                        var base64data = btoa(binary);
                        var extension = args.custom.file.split('.').pop();
                        var b64Header = "data:" + MIME[extension] + ";base64,";
                        var tag2 = args.custom.tag2 || "unset";
                        var tag3 = args.custom.tag3 || "unset";

                        var rargs = {
                            action: "getFile",
                            tag: args.tag,
                            dialogName: connections[connection_id].dialogName,
                            custom: {
                                file: b64Header + base64data,
                                tag2: tag2,
                                tag3: tag3
                            }
                        };
                        connections[connection_id].sendUTF(JSON.stringify(rargs));

                        rargs = null;
                        binary = null;
                        base64data = null;
                    } else {
                        serverLog(connection_id, "Requested file doesn't exist");
                    }
                } else {
                    serverLog(connection_id, "There was an error retrieving the zip file data");
                }
            });

        } else {
            // File isn't a .zip
        }
    } catch (e) {
        serverLog(connection_id, e);
    }
}

memory problem

Any help would be much appreciated in getting rid of this problem - Thanks!

Working Code Example

function handleFileRequest(args, connection_id) {
    var zipIndex = 0,
        pathLen = 0,
        f = "",
        d = "";
    try {
        if (args.custom.file.indexOf(".zip") > -1) {
            // We have a .zip directory!
            zipIndex = args.custom.file.indexOf(".zip") + 4;
            pathLen = args.custom.file.length;

            f = args.custom.file.substring(0, zipIndex);
            d = args.custom.file.substring(zipIndex + 1, pathLen);

            fs.readFile(f, function (err, data) {
                var rargs = null,
                    binary = null,
                    base64data = null,
                    zipObj = null;

                if (!err) {

                    zipObj = new JSZip();
                    zipObj.load(data);

                    if (zipObj.file(d)) {
                        binary = zipObj.file(d).asBinary();
                        base64data = btoa(binary);
                        var extension = args.custom.file.split('.').pop();
                        var b64Header = "data:" + MIME[extension] + ";base64,";
                        var tag2 = args.custom.tag2 || "unset";
                        var tag3 = args.custom.tag3 || "unset";

                        rargs = {
                            action: "getFile",
                            tag: args.tag,
                            dialogName: connections[connection_id].dialogName,
                            custom: {
                                file: b64Header + base64data,
                                tag2: tag2,
                                tag3: tag3
                            }
                        };
                        connections[connection_id].sendUTF(JSON.stringify(rargs));
                    } else {
                        serverLog(connection_id, "Requested file doesn't exist");
                    }
                } else {
                    serverLog(connection_id, "There was an error retrieving the zip file data");
                }

                rargs = null;
                binary = null;
                base64data = null;
                zipObj = null;
            });

        } else {
            // Non-Zip file
        }
    } catch (e) {
        serverLog(connection_id, e);
    }
}

Solution

  • If you use the same JSZip instance to load each and every file, you will keep everything in memory : the load method doesn't replace the existing content.

    Try using a new JSZip instance each time :

    var zipObj = new JSZip();
    zipObj.load(data);
    // or var zipObj = new JSZip(data);