I know that old school for
loop works in the traditional way - that it waits for the await
to finish getting results.
But in my use case, I need to read a file from local/s3 and process it line by line, and for each line I need to call an External API.
Generally I use await
inside the loop because all are running inside a lambda and I don't want to use all memory for running it parallelly.
Here I am reading the file using a stream.on()
method, and in order to use await inside that, I need to add async in read method, like so:
stream.on('data',async () =>{
while(data=stream.read()!==null){
console.log('line');
const requests = getRequests(); // sync code,no pblms
for(let i=0;i<requests.length;i++){
const result = await apiCall(request[i);
console.log('result from api')
const finalResult = await anotherapiCall(result.data);
}
}
});
This is working but order in which the lines are processed is not guaranteed. I need all in a sync manner. Any help?
async function processSOIFileLocal (options, params) {
console.log('Process SOI file');
const readStream = byline.createStream(fs.createReadStream(key));
readStream.setEncoding('utf8');
const pattern = /^UHL\s|^UTL\s/;
const regExp = new RegExp(pattern);
readStream.on('readable', () => {
let line;
while (null !== (line = readStream.read())) {
if (!regExp.test(line.toString())) {
totalRecordsCount++;
dataObject = soiParser(line);
const { id } = dataObject;
const XMLRequests = createLoSTRequestXML(
options,
{ mapping: event.mapping, row: dataObject }
);
console.log('Read line');
console.log(id);
try {
for (let i = 0;i < XMLRequests.length;i++) {
totalRequestsCount++;
console.log('Sending request');
const response = await sendLoSTRequest(
options,
{ data: XMLRequests[i],
url: LOST_URL }
);
console.log("got response");
const responseObj = await xml2js.
parseStringPromise(response.data);
if (Object.keys(responseObj).indexOf('errors') !== -1) {
fs.writeFileSync(`${ERR_DIR}/${generateKey()}-${id}.xml`, response.data);
failedRequestsCount++;
} else {
successRequestsCount++;
console.log('Response from the Lost Server');
console.log(response[i].data);
}
}
} catch (err) {
console.log(err);
}
}
}
})
.on('end', () => {
console.log('file processed');
console.log(`
************************************************
Total Records Processed:${totalRecordsCount}
Total Requests Sent: ${totalRequestsCount}
Success Requests: ${successRequestsCount}
Failed Requests: ${failedRequestsCount}
************************************************
`);
});
}
async function sendLoSTRequest (options, params) {
const { axios } = options;
const { url, data } = params;
if (url) {
return axios.post(url, data);
// eslint-disable-next-line no-else-return
} else {
console.log('URL is not found');
return null;
}
}
Code needs to flow like so: read a line in a sync way process the line and transform the line into an array of two members for every member call API and do stuff once line is complete, look for another line, all done in order
UPDATE: Got a workaround..but it fires stream.end() without waiting stream to finish read
async function processSOIFileLocal (options, params) {
console.log('Process SOI file');
const { ERR_DIR, fs, xml2js, LOST_URL, byline, event } = options;
const { key } = params;
const responseObject = {};
let totalRecordsCount = 0;
let totalRequestsCount = 0;
let failedRequestsCount = 0;
let successRequestsCount = 0;
let dataObject = {};
const queue = (() => {
let q = Promise.resolve();
return fn => (q = q.then(fn));
})();
const readStream = byline.createStream(fs.createReadStream(key));
readStream.setEncoding('utf8');
const pattern = /^UHL\s|^UTL\s/;
const regExp = new RegExp(pattern);
readStream.on('readable', () => {
let line;
while (null !== (line = readStream.read())) {
if (!regExp.test(line.toString())) {
totalRecordsCount++;
dataObject = soiParser(line);
const { id } = dataObject;
const XMLRequests = createLoSTRequestXML(
options,
{ mapping: event.mapping, row: dataObject }
);
// eslint-disable-next-line no-loop-func
queue(async () => {
try {
for (let i = 0;i < XMLRequests.length;i++) {
console.log('Sending request');
console.log(id);
totalRequestsCount++;
const response = await sendLoSTRequest(
options,
{ data: XMLRequests[i],
url: LOST_URL }
);
console.log('got response');
const responseObj = await xml2js.
parseStringPromise(response.data);
if (Object.keys(responseObj).indexOf('errors') !== -1) {
// console.log('Response have the error:');
// await handleError(options, { err: responseObj, id });
failedRequestsCount++;
fs.writeFileSync(`${ERR_DIR}/${generateKey()}-${id}.xml`, response.data);
} else {
console.log('Response from the Lost Server');
console.log(response[i].data);
successRequestsCount++;
}
}
} catch (err) {
console.log(err);
}
});
}
}
})
.on('end', () => {
console.log('file processed');
console.log(`
************************************************
Total Records Processed:${totalRecordsCount}
Total Requests Sent: ${totalRequestsCount}
Success Requests: ${successRequestsCount}
Failed Requests: ${failedRequestsCount}
************************************************
`);
Object.assign(responseObject, {
failedRequestsCount,
successRequestsCount,
totalRecordsCount,
totalRequestsCount
});
});
}
Thank You
If anyone want a solution for synchronisely process the file, ie, linebyline read and execute some Async call, it's recommended to use inbuilt stream transform. There we can create a transform function and return a callback when finishes. That's will help of any one face this issues. Through2 is a small npm library that also can be used for the same.