Search code examples
node.jspromisestack-overflow

how to avoid the "Allocation failed - JavaScript heap out of memory" during API import requests


I've got a nodejs script that read data in excels files and send it to a nodejs API with 'request'. this script works fine with a small amount of data. But when i test with an Excel with 60.000 lines, it breaks with error:
FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - JavaScript heap out of memory

function fillWithFile(file, path, header, fileConfig) { 
    let workbook = XLSX.readFile(path + '/' + file); // read file
    for (var sheetIterator = 0; sheetIterator < workbook.SheetNames.length; sheetIterator++) { // for each tab in excel file
        let worksheetJson = XLSX.utils.sheet_to_json(workbook.Sheets[workbook.SheetNames[sheetIterator]]); // transforme one tab of excel to JSON
        let config = fileConfig[workbook.SheetNames[sheetIterator]]; // get the config for the current tab

        let datasPromises = getPromises(worksheetJson, config, header);

        Promise.all(datasPromises).then(function (data) {
            var dataString = '{"data":' + JSON.stringify(data) + ',"options":{"purgeBefore":false}}';
            let options = {
                url: API_URL + '/' + config.service + 's/import',
                method: 'POST',
                headers: header,
                body: dataString
            }
            request.post(options, function (err, res, body) {
                if (err) throw err;
                console.log('Server responded with:', body);
            });
        });
    }
}

function getPromises(worksheetJson, config, header) {
        let datasPromises = [];
        let promises = [];
        for (let lineIterator = 0; lineIterator < worksheetJson.length; lineIterator++) { // for each line
            datasPromises.push(new Promise(function (resolve, reject) {
                for (let key in config.columns) { // for each column in config
                    if (config.columns[key].service !== undefined) { // if service exist we need to get id of the object in this service.
                        promises.push(new Promise(function (resolve, reject) {
                            findChildren(worksheetJson[lineIterator], config.columns[key], header)
                                .then(function (res) {
                                    resolve({ key: key, data: res.id });
                                });
                        }));
                    }
                    else {
                        promises.push(new Promise(function (resolve, reject) {
                            resolve({ key: key, data: worksheetJson[lineIterator][config.columns[key]] });
                        }));
                    }
                }
                let tmpObj = {};
                Promise.all(promises).then(function (values) {
                    for (var i = 0; i < values.length; i++) {
                        tmpObj[values[i].key] = values[i].data;
                    }
                    resolve(tmpObj);
                });
            }));
        }
        return datasPromises;
}

function findChildren(sheetData, config, header) { // get children with get request
    let dataObj = {};
    let searchParams;
    for (let key in config.columns) {
        dataObj[key] = sheetData[config.columns[key]];
        if (searchParams === undefined) searchParams = key + '=' + sheetData[config.columns[key]];
        else searchParams += '&' + key + '=' + sheetData[config.columns[key]];
    }

    var headers = {
        'Authorization': header.Authorization,
        'Accept': 'application/json, text/plain, */*',
    };
    var options = {
        url: API_URL + '/' + config.service + 's?' + searchParams,
        headers: headers
    };

    return new Promise(function (resolve, reject) {
        request(options, function (error, response, body) {
            if (!error && response.statusCode == 200) {
                try {
                        resolve(JSON.parse(body));
                } catch (e) {
                    reject(e);
                }
            }
            else {
                return reject(error);
            }
        });
    });
}

The script use a huge amount of memory and he ends up to crash ... Anyone have an idea how i can solve this ? I think, i have to find a way to force promises to resolve instantly for avoid the memory overflow. thanks for any help.


Solution

  • I think the problem is you don't control parallelism and when everything runs 'at once' - it causes memory issues.

    I've tried to rewrite the code using async/await - effectively all operations are now serial:

    async function fillWithFile(file, path, header, fileConfig) {
      let workbook = XLSX.readFile(path + '/' + file); // read file
      for (var sheetIterator = 0; sheetIterator < workbook.SheetNames.length; sheetIterator++) { // for each tab in excel file
        const worksheetJson = XLSX.utils.sheet_to_json(workbook.Sheets[workbook.SheetNames[sheetIterator]]); // transforme one tab of excel to JSON
        const config = fileConfig[workbook.SheetNames[sheetIterator]]; // get the config for the current tab
    
        const data = await getData(worksheetJson, config, header);
    
        const dataString = '{"data":' + JSON.stringify(data) + ',"options":{"purgeBefore":false}}';
        const options = {
          url: API_URL + '/' + config.service + 's/import',
          method: 'POST',
          headers: header,
          body: dataString
        };
    
        await new Promise((resolve, reject) => {
          request.post(options, function (err, res, body) {
            if (err) {
              reject(err);
              return;
            }
            console.log('Server responded with:', body);
            resolve(body);
          })
        });
      }
    }
    
    async function getData(worksheetJson, config, header) {
      const data = [];
      for (let lineIterator = 0; lineIterator < worksheetJson.length; lineIterator++) { // for each line
        const values = {};
        for (let key in config.columns) { // for each column in config
          if (config.columns[key].service !== undefined) { // if service exist we need to get id of the object in this service.
            const res = await findChildren(worksheetJson[lineIterator], config.columns[key], header);
            values[key] = res.id;
          }
          else {
            values[key] = worksheetJson[lineIterator][config.columns[key]];
          }
        }
        data.push(values);    
      }
      return data;
    }
    
    function findChildren(sheetData, config, header) { // get children with get request
      let dataObj = {};
      let searchParams;
      for (let key in config.columns) {
        dataObj[key] = sheetData[config.columns[key]];
        if (searchParams === undefined) searchParams = key + '=' + sheetData[config.columns[key]];
        else searchParams += '&' + key + '=' + sheetData[config.columns[key]];
      }
    
      var headers = {
        'Authorization': header.Authorization,
        'Accept': 'application/json, text/plain, */*',
      };
      var options = {
        url: API_URL + '/' + config.service + 's?' + searchParams,
        headers: headers
      };
    
      return new Promise(function (resolve, reject) {
        request(options, function (error, response, body) {
          if (!error && response.statusCode === 200) {
            try {
              resolve(JSON.parse(body));
            } catch (e) {
              reject(e);
            }
          }
          else {
            return reject(error);
          }
        });
      });
    }
    

    You now run import like this:

    fillWithFile(..args).then(() => console.log('done'))