i'm having some trouble with the whole async await thing in node.js I'm using this wait function to stand in for the actual function which is a tesseract.js thing but the wait function produces the same behaviour
this is meant to take a list of PDF filenames and then loop through them ONE AT A TIME then wait for tesseract.js to finish its ocr on that particular file before moving on to the next one.
I've made a SUPER messy recursive function to ocr the pages in order but it produces the same issue as this wait function does and this is easier to read than my mess
const PdfList = fs.readdirSync("./Pdfs");
async function testReduce(array = []) {
array = [""].concat(array);
await array.reduce(async (a, pdf, pdfIndex) => {
pdfIndex = pdfIndex - 1;
//testing Stuff
console.log("current PDF: " + pdf);
console.log("current Index: " + pdfIndex);
// console.log(files);
await wait(30000);
});
console.log("testReduce");
}
async function run() {
await testReduce(PdfList);
console.log("testBoth");
}
run();
async function wait(ms) {
return new Promise((resolve) => {
console.log(ms);
setTimeout(resolve, ms);
});
}
i get the following output immediately
current PDF: PROBATE - 180000529.pdf
current Index: 0
30000
current PDF: PROBATE - 180000529pdf
current Index: 1
30000
current PDF: PROBATE - 190000452.pdf
current Index: 2
30000
current PDF: PROBATE - 190000452pdf
current Index: 3
30000
current PDF: PROBATE - 200000501.pdf
current Index: 4
30000
current PDF: PROBATE - 200000501pdf
current Index: 5
30000
testReduce
testBoth
then after that it actually waits this would be where the ocr Starts to happen
well i've tried lots fo ways from array.foreach to array map the reduce is my latest attempt but this is an async thing im getting wrong so i don't really know what up here.
i'm expecting the await to halt everything until it's done
When an await
is encountered, the current async
function's execution context is saved for later, and it returns. For reduce
(and other loop methods) this means that the reduce callback is called again in quick succession -- synchronously. The promise that reduce
returns is the promise of the last callback execution. It has nothing to do with the promises returned by the previous iterations -- those are lost.
This means that "testReduce" is output after one delay, not the summation of all delays.
If you want the second iteration to only start when the first one has finished its API request (mocked by the wait), ...etc, then don't use a callback, but have the loop do its job in the context of the bigger function:
const PdfList = ["a.pdf", "b.pdf", "c.pdf", "d.pdf"];
async function testReduce(array = []) {
for (const [pdf, pdfIndex] of array.entries()) {
//testing Stuff
console.log("current PDF: " + pdf);
console.log("current Index: " + pdfIndex);
await wait(2000); // I used a smaller delay here!
}
console.log("testReduce");
}
async function run() {
await testReduce(PdfList);
console.log("testBoth");
}
run();
async function wait(ms) {
return new Promise((resolve) => {
console.log(ms);
setTimeout(resolve, ms);
});
}