Search code examples
javascriptarraysoptimizationforeachcomputation

How to optimize the computation speed in array.forEach in node.js


I have a piece of code simply loops through two arrays and for each element of the first array, it finds the relevant element in the second array and changes only the first occurrence and delete the remaining ones.

 /**
     * The aggregation data structure:
     * "_id": {
     * "geometry": geometry,
     * "dups": [
     *    "5b3b25b4e54029249c459bfc", keep only the fisrt element in allDocs
     *    "5b3b25b4e54029249c459e65", delete it from allDocs
     *    "5b3b25b4e54029249c459d7d"   delete it from allDocs
     *   ],
     * "dupsProp": [  ], array of all properties of duplicatePoints
     * "count": 3
     */
var aggregationRes =[46,000 objects]
var allDocs =[345,000 objects]
aggregationRes.forEach(function (resElem, counter) {
        console.log(counter + "/" + aggregationRes.length)
        //Delete objects in allDocs based on dups array except the first one
        var foundIndex = allDocs.findIndex(x => x._id.toString() == resElem.dups[0]);
                //assign the mergedProperties
        allDocs[foundIndex].properties = resElem.dupsProp;
        //delete the remaining ids in Docs from dups array 
        resElem.dups.forEach(function (dupElem, index) {
            var tmpFoundIndex = allDocs.findIndex(x => x._id.toString() == resElem.dups[index + 1]);
            if (tmpFoundIndex !== -1) {
                allDocs.splice(tmpFoundIndex, 1)
            }
        })
    })

This script runs almost for 4 hours. As you see, the calculations are really simple but since the allDocs array is big, it takes quite a long time. It would be great if someone gives me a hint on how to decrease the computation time. Thanks in advance


Solution

  • Taking the ideas from Bergi, we index the documents by id to avoid having to find the indexes which is expensive:

    var allDocs =[345,000 objects]
    var aggregationRes =[46,000 objects]
    var allDocsIndexed = {};
    
    allDocs.forEach(function(doc){
        allDocsIndexed[doc._id.toString()] = doc;
    });
    
    aggregationRes.forEach(function (resElem, counter) {
        allDocsIndexed[resElem.dups[0]].properties = resElem.dupsProp;
        for (var i = 1; i < resElem.dupsProp.length; i++) {
            delete allDocsIndexed[resElem.dupsProp[i]];
        }
    });
    
    var allUndeletedDocs = allDocs.filter(doc => allDocsIndexed.hasOwnProperty(doc_id.toString()));
    

    Note that for javascript, this is an efficient solution but with more details provided, a better one might exist using mongodb features.