I've got a nodejs script that read data in excels files and send it to a nodejs API with 'request'. this script works fine with a small amount of data. But when i test with an Excel with 60.000 lines, it breaks with error:
FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - JavaScript heap out of memory
function fillWithFile(file, path, header, fileConfig) {
let workbook = XLSX.readFile(path + '/' + file); // read file
for (var sheetIterator = 0; sheetIterator < workbook.SheetNames.length; sheetIterator++) { // for each tab in excel file
let worksheetJson = XLSX.utils.sheet_to_json(workbook.Sheets[workbook.SheetNames[sheetIterator]]); // transforme one tab of excel to JSON
let config = fileConfig[workbook.SheetNames[sheetIterator]]; // get the config for the current tab
let datasPromises = getPromises(worksheetJson, config, header);
Promise.all(datasPromises).then(function (data) {
var dataString = '{"data":' + JSON.stringify(data) + ',"options":{"purgeBefore":false}}';
let options = {
url: API_URL + '/' + config.service + 's/import',
method: 'POST',
headers: header,
body: dataString
}
request.post(options, function (err, res, body) {
if (err) throw err;
console.log('Server responded with:', body);
});
});
}
}
function getPromises(worksheetJson, config, header) {
let datasPromises = [];
let promises = [];
for (let lineIterator = 0; lineIterator < worksheetJson.length; lineIterator++) { // for each line
datasPromises.push(new Promise(function (resolve, reject) {
for (let key in config.columns) { // for each column in config
if (config.columns[key].service !== undefined) { // if service exist we need to get id of the object in this service.
promises.push(new Promise(function (resolve, reject) {
findChildren(worksheetJson[lineIterator], config.columns[key], header)
.then(function (res) {
resolve({ key: key, data: res.id });
});
}));
}
else {
promises.push(new Promise(function (resolve, reject) {
resolve({ key: key, data: worksheetJson[lineIterator][config.columns[key]] });
}));
}
}
let tmpObj = {};
Promise.all(promises).then(function (values) {
for (var i = 0; i < values.length; i++) {
tmpObj[values[i].key] = values[i].data;
}
resolve(tmpObj);
});
}));
}
return datasPromises;
}
function findChildren(sheetData, config, header) { // get children with get request
let dataObj = {};
let searchParams;
for (let key in config.columns) {
dataObj[key] = sheetData[config.columns[key]];
if (searchParams === undefined) searchParams = key + '=' + sheetData[config.columns[key]];
else searchParams += '&' + key + '=' + sheetData[config.columns[key]];
}
var headers = {
'Authorization': header.Authorization,
'Accept': 'application/json, text/plain, */*',
};
var options = {
url: API_URL + '/' + config.service + 's?' + searchParams,
headers: headers
};
return new Promise(function (resolve, reject) {
request(options, function (error, response, body) {
if (!error && response.statusCode == 200) {
try {
resolve(JSON.parse(body));
} catch (e) {
reject(e);
}
}
else {
return reject(error);
}
});
});
}
The script use a huge amount of memory and he ends up to crash ... Anyone have an idea how i can solve this ? I think, i have to find a way to force promises to resolve instantly for avoid the memory overflow. thanks for any help.
I think the problem is you don't control parallelism and when everything runs 'at once' - it causes memory issues.
I've tried to rewrite the code using async/await - effectively all operations are now serial:
async function fillWithFile(file, path, header, fileConfig) {
let workbook = XLSX.readFile(path + '/' + file); // read file
for (var sheetIterator = 0; sheetIterator < workbook.SheetNames.length; sheetIterator++) { // for each tab in excel file
const worksheetJson = XLSX.utils.sheet_to_json(workbook.Sheets[workbook.SheetNames[sheetIterator]]); // transforme one tab of excel to JSON
const config = fileConfig[workbook.SheetNames[sheetIterator]]; // get the config for the current tab
const data = await getData(worksheetJson, config, header);
const dataString = '{"data":' + JSON.stringify(data) + ',"options":{"purgeBefore":false}}';
const options = {
url: API_URL + '/' + config.service + 's/import',
method: 'POST',
headers: header,
body: dataString
};
await new Promise((resolve, reject) => {
request.post(options, function (err, res, body) {
if (err) {
reject(err);
return;
}
console.log('Server responded with:', body);
resolve(body);
})
});
}
}
async function getData(worksheetJson, config, header) {
const data = [];
for (let lineIterator = 0; lineIterator < worksheetJson.length; lineIterator++) { // for each line
const values = {};
for (let key in config.columns) { // for each column in config
if (config.columns[key].service !== undefined) { // if service exist we need to get id of the object in this service.
const res = await findChildren(worksheetJson[lineIterator], config.columns[key], header);
values[key] = res.id;
}
else {
values[key] = worksheetJson[lineIterator][config.columns[key]];
}
}
data.push(values);
}
return data;
}
function findChildren(sheetData, config, header) { // get children with get request
let dataObj = {};
let searchParams;
for (let key in config.columns) {
dataObj[key] = sheetData[config.columns[key]];
if (searchParams === undefined) searchParams = key + '=' + sheetData[config.columns[key]];
else searchParams += '&' + key + '=' + sheetData[config.columns[key]];
}
var headers = {
'Authorization': header.Authorization,
'Accept': 'application/json, text/plain, */*',
};
var options = {
url: API_URL + '/' + config.service + 's?' + searchParams,
headers: headers
};
return new Promise(function (resolve, reject) {
request(options, function (error, response, body) {
if (!error && response.statusCode === 200) {
try {
resolve(JSON.parse(body));
} catch (e) {
reject(e);
}
}
else {
return reject(error);
}
});
});
}
You now run import like this:
fillWithFile(..args).then(() => console.log('done'))