Search code examples
javascriptnode.jsnode-red

How to speed up getting data for javascript array


I want to get the creation date of 20000 files and store it in an array.
Total time to complete is 35 minutes, quite a long time. (Image Processing Time)
Is there a way to create the array with faster processing time?

Is there any problem with the current logic to get an array of file creation dates like below?
① Array declaration: var arr = [];
② I used the code below to get the file creation date:

var fs = global.get('fs');
// Get file creation date time
msg.createdDate = fs.statSync(msg.pathFName).birthtime;
return msg;

③ Store creation date in an array.

My Flow:
Image Flow

[
    {
        "id": "de157360.11d49",
        "type": "tab",
        "label": "フロー 1",
        "disabled": false,
        "info": ""
    },
    {
        "id": "6a8db36.f55e14c",
        "type": "inject",
        "z": "de157360.11d49",
        "name": "",
        "props": [
            {
                "p": "payload"
            },
            {
                "p": "topic",
                "vt": "str"
            }
        ],
        "repeat": "",
        "crontab": "",
        "once": false,
        "onceDelay": 0.1,
        "topic": "",
        "payload": "",
        "payloadType": "str",
        "x": 750,
        "y": 160,
        "wires": [
            [
                "4907d69b.07f738"
            ]
        ]
    },
    {
        "id": "4907d69b.07f738",
        "type": "change",
        "z": "de157360.11d49",
        "name": "",
        "rules": [
            {
                "t": "set",
                "p": "pathFiles",
                "pt": "msg",
                "to": "D:\\Data\\",
                "tot": "str"
            }
        ],
        "action": "",
        "property": "",
        "from": "",
        "to": "",
        "reg": false,
        "x": 910,
        "y": 160,
        "wires": [
            [
                "d68da128.cd6d9"
            ]
        ]
    },
    {
        "id": "d68da128.cd6d9",
        "type": "fs-ops-dir",
        "z": "de157360.11d49",
        "name": "",
        "path": "pathFiles",
        "pathType": "msg",
        "filter": "*",
        "filterType": "str",
        "dir": "files",
        "dirType": "msg",
        "x": 1080,
        "y": 160,
        "wires": [
            [
                "764296f2.688338"
            ]
        ]
    },
    {
        "id": "764296f2.688338",
        "type": "change",
        "z": "de157360.11d49",
        "name": "f=0",
        "rules": [
            {
                "t": "set",
                "p": "f",
                "pt": "msg",
                "to": "0",
                "tot": "num"
            },
            {
                "t": "set",
                "p": "arrObjFiles",
                "pt": "msg",
                "to": "[]",
                "tot": "json"
            }
        ],
        "action": "",
        "property": "",
        "from": "",
        "to": "",
        "reg": false,
        "x": 1210,
        "y": 160,
        "wires": [
            [
                "ab664988.c25da8",
                "232fdda0.b11e22"
            ]
        ]
    },
    {
        "id": "1912092c.fbab77",
        "type": "change",
        "z": "de157360.11d49",
        "name": "f++",
        "rules": [
            {
                "t": "set",
                "p": "f",
                "pt": "msg",
                "to": "$.f + 1\t",
                "tot": "jsonata"
            }
        ],
        "action": "",
        "property": "",
        "from": "",
        "to": "",
        "reg": false,
        "x": 1650,
        "y": 60,
        "wires": [
            [
                "ab664988.c25da8"
            ]
        ]
    },
    {
        "id": "ab664988.c25da8",
        "type": "switch",
        "z": "de157360.11d49",
        "name": "u<number of files",
        "property": "f",
        "propertyType": "msg",
        "rules": [
            {
                "t": "lt",
                "v": "files.length",
                "vt": "msg"
            },
            {
                "t": "else"
            }
        ],
        "checkall": "true",
        "repair": false,
        "outputs": 2,
        "x": 1410,
        "y": 160,
        "wires": [
            [
                "15154524.a6828b"
            ],
            [
                "c7625457.8ccb18"
            ]
        ]
    },
    {
        "id": "f8030294.f6334",
        "type": "function",
        "z": "de157360.11d49",
        "name": "Get file creation date time",
        "func": "var fs = global.get('fs');\n// Get file creation date time\nmsg.createdDate = fs.statSync(msg.pathFName).birthtime;\nreturn msg;",
        "outputs": 1,
        "noerr": 0,
        "initialize": "",
        "finalize": "",
        "x": 1830,
        "y": 160,
        "wires": [
            [
                "59879c16.6c7564"
            ]
        ]
    },
    {
        "id": "59879c16.6c7564",
        "type": "change",
        "z": "de157360.11d49",
        "name": "Storage file creation date time",
        "rules": [
            {
                "t": "set",
                "p": "objFiles",
                "pt": "msg",
                "to": "{}",
                "tot": "json"
            },
            {
                "t": "set",
                "p": "objFiles",
                "pt": "msg",
                "to": "$merge([$.objFiles,{'fileName': $.fileName,'createdDate': $.createdDate}])",
                "tot": "jsonata"
            },
            {
                "t": "set",
                "p": "arrObjFiles",
                "pt": "msg",
                "to": "$append(arrObjFiles, [objFiles])",
                "tot": "jsonata"
            }
        ],
        "action": "",
        "property": "",
        "from": "",
        "to": "",
        "reg": false,
        "x": 2090,
        "y": 160,
        "wires": [
            [
                "1912092c.fbab77"
            ]
        ]
    },
    {
        "id": "15154524.a6828b",
        "type": "change",
        "z": "de157360.11d49",
        "name": "",
        "rules": [
            {
                "t": "set",
                "p": "fileName",
                "pt": "msg",
                "to": "$.files[$$.f]",
                "tot": "jsonata"
            },
            {
                "t": "set",
                "p": "pathFName",
                "pt": "msg",
                "to": "pathFiles & fileName",
                "tot": "jsonata"
            }
        ],
        "action": "",
        "property": "",
        "from": "",
        "to": "",
        "reg": false,
        "x": 1620,
        "y": 160,
        "wires": [
            [
                "f8030294.f6334"
            ]
        ]
    },
    {
        "id": "c7625457.8ccb18",
        "type": "debug",
        "z": "de157360.11d49",
        "name": "END",
        "active": true,
        "tosidebar": true,
        "console": false,
        "tostatus": false,
        "complete": "arrObjFiles",
        "targetType": "msg",
        "statusVal": "",
        "statusType": "auto",
        "x": 1590,
        "y": 220,
        "wires": []
    },
    {
        "id": "232fdda0.b11e22",
        "type": "debug",
        "z": "de157360.11d49",
        "name": "START",
        "active": true,
        "tosidebar": true,
        "console": false,
        "tostatus": false,
        "complete": "files.length",
        "targetType": "msg",
        "statusVal": "",
        "statusType": "auto",
        "x": 1380,
        "y": 220,
        "wires": []
    }
]

Solution

  • You're using fs.statSync which is a synchronous function, meaning that every time you call it, all code execution stops until that function finishes. Look into using fs.stat (the asynchronous version), mapping over your array of filepaths, and using Promise.all.

    Using the fs.stat (the asynchronous version) function, you can start the calls of many files at a time so that it overall happens faster (because multiple files can be loaded at once without having to wait for super slow ones)

    Here's an example of what I mean, that you can run in the browser:

    const arr = ["hello", "i", "am", "here"];
    
    // This is an example of a function that returns a promise.
    // This function returns a promise that resolves after `seconds` amount of seconds, returning the value of `word` to uppercase
    const sleepAndUppercase = (seconds, word) => new Promise(
      (resolve) => setTimeout(
         () => resolve(word.toUpperCase()), seconds*1000
       )
    );
    
    // This wraps the array of promises
    Promise.all(
      // this converts the array of words into an array of promises generated by the sleepAndUppercase function defined above
      arr.map((word, i) => sleepAndUppercase(i, word))
      // the next line waits until all the promies are done, and uses the result as the variable listOfWords,
      // which is an array of the words capitalized
    ).then((listOfWords) => {
        // we can log the results
        console.log(listOfWords);
        // we can create variables based off of those results
        const sentence = listOfWords.join(" ");
        console.log(sentence);
      });
    
    console.log("Promises started")

    Now to make it closer to your question:

    In this example, each call to fs.stat takes 5 seconds to resolve, but the all 4 of the results come out together much faster than 5 * 4 seconds, since multiple calls start at once.

    // This object is just here so that this snippet makes sense on StackOverflow.
    // Do not copy this object into your code.
    const fs = {
      stat: (pathFName) => {
        const birthtime = Date.now() - Math.floor(Math.random() * 100000000);
        return new Promise((resolve, reject) => {
          setTimeout(() => resolve({birthtime}), 5000)
        })
      }
    }
    
    // start timer
    let timer = 0;
    const timerInterval = setInterval(() => document.querySelector("#ptimer").innerText = `${timer += 0.02}s`, 20);
    
    const msgs = [
      {pathFName: "foo.txt"},
      {pathFName: "bar.png"},
      {pathFName: "baz.html"},
      {pathFName: "idk.jpg"},
      {pathFName: "pepe.sql"},
    ];
    
    (async () => {
      const msgsWithTimeStamps = await Promise.all(
        msgs.map(async (msg) => {
          const {birthtime} = await fs.stat(msg.pathFName);
          return {...msg, birthtime}
        })
      )
      console.log(msgsWithTimeStamps);
      // stop timer
      clearInterval(timerInterval);
    })()
    <p id="ptimer">0s</p>