Please check if my understanding about the following for loop
is correct.
for(let i=0; i<1000; i){
sample_function(i, function(result){});
}
The moment the for loop
is invoked, 1000 events of sample_function
will be qued in the event loop
. After about 5 seconds a user gives a http request, which is qued after those "1000 events".
Usually this would not be a problem because the loop is asynchronous.
But lets say that this sample_function
is a CPU intensive function. Therefore the "1000 events" are completed consecutively and each take about 1 second.
As a result, the for loop will block for about 1000 seconds.
Would there be a way to solve such problem? For example would it be possible to let the thread take a "break" every 10 loops? and allow other new ques to pop in between? If so how would I do it?
There is a technique called partitioning
which you can read about in the NodeJs's document, But as the document states:
If you need to do something more complex, partitioning is not a good option. This is because partitioning uses only the Event Loop, and you won't benefit from multiple cores almost certainly available on your machine.
So you can also use another technique called offloading, e.g. using worker threads
or child processes
which also have certain downsides like having to serialize and deserialize any objects that you wish to share between the event loop (current thread) and a worker thread or a child process
Following is an example of partitioning that I came up with which is in the context of an express application.
const express = require('express');
const crypto = require('crypto');
const randomstring = require('randomstring');
const app = express();
const port = 80;
app.get('/', async (req, res) => {
res.send('ok');
})
app.get('/block', async (req, res) => {
let result = [];
for (let i = 0; i < 10; ++i) {
result.push(await block());
}
res.send({result});
})
app.listen(port, () => {
console.log(`Listening on port ${port}`);
console.log(`http://localhost:${port}`);
})
/* takes around 5 seconds to run(varies depending on your processor) */
const block = () => {
//promisifying just to get the result back to the caller in an async way, this is not part of the partitioning technique
return new Promise((resolve, reject) => {
/**
* https://nodejs.org/en/docs/guides/dont-block-the-event-loop/#partitioning
* using partitioning techinique(using setImmediate/setTimeout) to prevent a long running operation
* to block the eventloop completely
* there will be a breathing period between each time block is called
*/
setImmediate(() => {
let hash = crypto.createHash("sha256");
const numberOfHasUpdates = 10e5;
for (let iter = 0; iter < numberOfHasUpdates; iter++) {
hash.update(randomstring.generate());
}
resolve(hash);
})
});
}
There are two endpoints /
and /block
, if you hit /block
and then hit /
endpoint, what happens is that the /
endpoint will take around 5 seconds to give back response(during the breathing space(the thing that you call it a "break"))
If setImmediate
was not used, then the /
endpoint would respond to a request after approximately 10 * 5
seconds(10 being the number of times block
function is called in the for-loop)
Also you can do partitioning using a recursive approach like this:
/**
*
* @param items array we need to process
* @param chunk a number indicating number of items to be processed on each iteration of event loop before the breathing space
*/
function processItems(items, chunk) {
let i = 0;
const process = (done) => {
let currentChunk = chunk;
while (currentChunk > 0 && i < items?.length) {
--currentChunk;
syncBlock();
++i;
}
if (i < items?.length) {
setImmediate(process);//the key is to schedule the next recursive call (by passing the function to setImmediate) instead of doing a recursive call (by simply invoking the process function)
}
}
process();
}
And if you need to get back the data processed you can promisify it like this:
function processItems(items, chunk) {
let i = 0;
let result = [];
const process = (done) => {
let currentChunk = chunk;
while (currentChunk > 0 && i < items?.length) {
--currentChunk;
const returnedValue = syncBlock();
result.push(returnedValue);
++i;
}
if (i < items?.length) {
setImmediate(() => process(done));
} else {
done && done(result);
}
}
const promisified = () => new Promise((resolve) => process(resolve));
return promisified();
}
And you can test it by adding this route handler to the other route handlers provided above:
app.get('/block2', async (req, res) => {
let result = [];
let arr = [];
for (let i = 0; i < 10; ++i) {
arr.push(i);
}
result = await processItems(arr, 1);
res.send({ result });
})