Search code examples
javascriptnode.jsherokuaxioscheerio

Axios.get too slow to return data


I have an axios get request which takes too long to resolve. This is for a site hosted on Heroku, which has a request timeout set at 30 seconds. The following code returns the request after about 50 seconds (which is surprisingly long, as there are only 21 urls to loop through in playerLink). Therefore, the request is never resolved on the live site.

Here is the Promise code:

const PORT = 8000

const axios = require('axios')
const cheerio = require('cheerio')
const express = require('express')
const cors = require('cors')

const app = express()
app.use(cors())


app.listen(PORT , () => console.log(`server running on PORT ${PORT}`))

const players = 'https://www.trinethunder.com/sports/sball/2021-22/teams/trine?view=roster'
const playerStats = 'https://www.trinethunder.com'

const playerLink = []

app.get('/players', (req, res) => {
    function getPlayers() {
      return new Promise((resolve, reject) => {
        axios(players)
          .then((response) => {
            const html = response.data;
            const $ = cheerio.load(html);
            
            $("td.text.pinned-col > a", html).each(function () {
              var link = $(this).attr("href");
              //if link not yet in array, push to array
              if (playerLink.indexOf(playerStats + link) === -1) {
                playerLink.push(playerStats + link);
              }
            });
            resolve()
          })
          .catch((err) => {
            console.log(err);
          });
      });
    }
    function getPlayerStats() {
      setTimeout(async () => {
        const statsArray = []
        for (let i = 0; i < playerLink.length; i++) {
          await new Promise((resolve, reject) => {
            axios.get
              (playerLink[i])
              .then((response) => {
                const html = response.data;
                const $ = cheerio.load(html);
                const statName = [];
                const statDesc = [];
                const statNum = [];
  
                $("h2 > span:nth-child(1)", html).each(function () {
                  var name = $(this).text();
                  statName.push(name);
                });
                $(".stat-title", html).each(function () {
                  var stat1 = $(this).text();
                  statDesc.push(stat1);
                });
                $(".stat-value", html).each(function () {
                  var stat2 = $(this).text();
                  statNum.push(stat2);
                });
                //Conditional is here because sometimes statsArray
                //gets filled multiple times
                if (statsArray.length < 63) {
                  statsArray.push(statName, statDesc, statNum);
                }
                resolve();
              })
              .catch((err) => console.log(err));
          });
        }
        res.json(statsArray)
      }, 400);
    }
    
    getPlayers()
      .then(getPlayerStats)
      .catch((err) => console.log(err));
  });

Simplified Fetch statement for /players:

fetch('http://localhost:8000/players')
    .then(response => response.json())
    .then(data => {
        
     console.log(data)
             
}).catch(err=>console.log(err))

Please let me know if you see anything that may be slowing down the execution of the request.


Solution

  • I cleaned up the code, removed the setTimeout(), set it up for maximum parallelization and instrumented it and made it so it can run stand-alone. After doing so, the log it produces is below and I see that getPlayers() takes 2413ms and the synchronous cheerio processing of the individual player requests takes a total of 6087ms. From start to finish, the whole thing takes 9415ms on my system.

    This is significantly faster than what you report. The biggest structural change I made is that all the individual getPlayerStat requests are made in parallel, not in serial which (if the target server can handle it) will shorten the total wait for network requests on getting player stats. I also removed the setTimeout() as that seemed like a hack for some other problem and once the code is structured properly for asynchronous handling, that should not be necessary.

    Here's the detailed log if you want to see where all the detailed time is spent. You can run the code below on your own system to see what you get there:

    000000:  begin all
    000006:  begin getPlayers()
    002419:  end getPlayers()
    002419:  begin getPlayerStats
    002420:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/makinzeromingersy0k
    002423:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/emersynhaneyjnrb
    002424:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/amandapratheruluw
    002424:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/adrienneroseybff7
    002425:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/emmabeyeri6zz
    002426:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/aprilsellersi95s
    002427:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/annakoeppl38q8
    002427:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/annagilli8rl
    002428:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/angelenaperry2scn
    002429:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/laurenclausenfb4j
    002430:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/emilywheaton1jym
    002430:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/kaylyncoahranhp6r
    002431:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/mercededaughertyiswy
    002432:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/taylormurdockgeho
    002432:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/lexiclark77gr
    002433:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/ainsleyphillipsmfe9
    002434:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/ellietrinexhe2
    002434:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/ashleyswartouta714
    002435:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/gisellerileybdb8
    002436:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/elizabethkoch5umu
    002436:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/scarlettelliott0bvt
    003251:  after get https://www.trinethunder.com/sports/sball/2021-22/players/kaylyncoahranhp6r
    003596:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/kaylyncoahranhp6r
    003599:  after get https://www.trinethunder.com/sports/sball/2021-22/players/makinzeromingersy0k
    003902:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/makinzeromingersy0k
    003905:  after get https://www.trinethunder.com/sports/sball/2021-22/players/emersynhaneyjnrb
    004200:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/emersynhaneyjnrb
    004203:  after get https://www.trinethunder.com/sports/sball/2021-22/players/amandapratheruluw
    004489:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/amandapratheruluw
    004492:  after get https://www.trinethunder.com/sports/sball/2021-22/players/emmabeyeri6zz
    004771:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/emmabeyeri6zz
    004773:  after get https://www.trinethunder.com/sports/sball/2021-22/players/aprilsellersi95s
    005060:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/aprilsellersi95s
    005063:  after get https://www.trinethunder.com/sports/sball/2021-22/players/elizabethkoch5umu
    005345:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/elizabethkoch5umu
    005348:  after get https://www.trinethunder.com/sports/sball/2021-22/players/emilywheaton1jym
    005638:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/emilywheaton1jym
    005643:  after get https://www.trinethunder.com/sports/sball/2021-22/players/ashleyswartouta714
    005943:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/ashleyswartouta714
    005951:  after get https://www.trinethunder.com/sports/sball/2021-22/players/ainsleyphillipsmfe9
    006243:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/ainsleyphillipsmfe9
    006245:  after get https://www.trinethunder.com/sports/sball/2021-22/players/adrienneroseybff7
    006541:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/adrienneroseybff7
    006545:  after get https://www.trinethunder.com/sports/sball/2021-22/players/annagilli8rl
    006821:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/annagilli8rl
    006824:  after get https://www.trinethunder.com/sports/sball/2021-22/players/mercededaughertyiswy
    007111:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/mercededaughertyiswy
    007118:  after get https://www.trinethunder.com/sports/sball/2021-22/players/lexiclark77gr
    007402:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/lexiclark77gr
    007411:  after get https://www.trinethunder.com/sports/sball/2021-22/players/angelenaperry2scn
    007681:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/angelenaperry2scn
    007685:  after get https://www.trinethunder.com/sports/sball/2021-22/players/laurenclausenfb4j
    007974:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/laurenclausenfb4j
    007976:  after get https://www.trinethunder.com/sports/sball/2021-22/players/scarlettelliott0bvt
    008265:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/scarlettelliott0bvt
    008267:  after get https://www.trinethunder.com/sports/sball/2021-22/players/ellietrinexhe2
    008553:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/ellietrinexhe2
    008555:  after get https://www.trinethunder.com/sports/sball/2021-22/players/gisellerileybdb8
    008838:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/gisellerileybdb8
    008840:  after get https://www.trinethunder.com/sports/sball/2021-22/players/annakoeppl38q8
    009129:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/annakoeppl38q8
    009131:  after get https://www.trinethunder.com/sports/sball/2021-22/players/taylormurdockgeho
    009415:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/taylormurdockgeho
    009415:  end all
     ... data here
    getPlayers() took 2413ms
    cheerio processing took 6087ms
    

    And, here's the stand-alone code that anyone can run:

    const axios = require('axios');
    const cheerio = require('cheerio');
    
    const players = 'https://www.trinethunder.com/sports/sball/2021-22/teams/trine?view=roster'
    const playerStats = 'https://www.trinethunder.com'
    
    
    
    const zeroes = "000000000000000000000000000000";
    
    function zeroPad(num, padLen) {
        let str = num + "";
        let padNum = padLen - str.length;
        if (padNum > 0) {
            str = zeroes.slice(0, padNum) + str;
        }
        return str;
    }
    
    const base = Date.now();
    
    function log(...args) {
        let delta = Date.now() - base;
        let deltaPad = zeroPad(delta, 6);
        console.log(deltaPad + ": ", ...args);
    }
    
    let getPlayersT = 0;
    let cheerioT = 0;
    
    async function run() {
    
        async function getPlayers() {
            log("begin getPlayers()");
            let startT = Date.now();
            const playerLink = [];
            const response = await axios(players);
            const html = response.data;
            const $ = cheerio.load(html);
    
            $("td.text.pinned-col > a", html).each(function() {
                const link = $(this).attr("href");
                //if link not yet in array, push to array
                if (playerLink.indexOf(playerStats + link) === -1) {
                    playerLink.push(playerStats + link);
                }
            });
            log("end getPlayers()")
            getPlayersT += Date.now() - startT;
            return playerLink;
        }
    
        async function getPlayerStats(playerLink) {
            log("begin getPlayerStats");
            const statsArray = [];
            await Promise.all(playerLink.map(async link => {
                log(`begin get ${link}`)
                const response = await axios.get(link);
                log(`after get ${link}`)
                const html = response.data;
                const startT = Date.now();
                const $ = cheerio.load(html);
                const statName = [];
                const statDesc = [];
                const statNum = [];
    
                $("h2 > span:nth-child(1)", html).each(function() {
                    var name = $(this).text();
                    statName.push(name);
                });
                $(".stat-title", html).each(function() {
                    var stat1 = $(this).text();
                    statDesc.push(stat1);
                });
                $(".stat-value", html).each(function() {
                    var stat2 = $(this).text();
                    statNum.push(stat2);
                });
                //Conditional is here because sometimes statsArray
                //gets filled multiple times
                if (statsArray.length < 63) {
                    statsArray.push(statName, statDesc, statNum);
                }
                cheerioT += Date.now() - startT;
                log(`after cheerio parse ${link}`);
            }));
            return statsArray;
        }
    
        try {
            log("begin all")
            const playerLink = await getPlayers();
            const statsArray = await getPlayerStats(playerLink);
            log("end all")
            return statsArray;
        } catch (e) {
            console.log(e);
        }
    }
    
    run().then(result => {
        console.log(result);
        console.log(`getPlayers() took ${getPlayersT}ms`);
        console.log(`cheerio processing took ${cheerioT}ms`);
    }).catch(err => {
        console.log("error");
    });