Search code examples
node.jspuppeteer

How to block specific JavaScript script requests with Puppeteer


I made a script that selects the size of the sneakers and adds them to the cart, but I would like to know how I can block a specific JavaScript file. I want to improve the page loading speed. My script is currently like this:

const puppeteer = require("puppeteer");
const fs = require("fs").promises;
(async () => {
  try {
    console.log("Started!");
    const browser = await puppeteer.launch({
      executablePath: "/usr/bin/chromium",
      headless: false,
    });
    const page = await browser.newPage();
    await page.setViewport({ width: 1920, height: 1080 });
    await page.setRequesttInterception(true);
    page.on("request", (req) => {
      if (
        req.resourceType() === "image" ||
        req.resourceType() === "stylesheet" ||
        req.resourceType() === "font" ||
        req.resourceType() === "texttrack" ||
        req.resourceType() === "imageset" ||
        req.resourceType() === "bacon" ||
        req.resourceType() === "csp_report" ||
        req.resourceType() === "object"
      ) {
        req.abort();
      } else {
        req.continue();
      }
    });
    const cookiesString = await fs.readFile("./cookies.json");
    const cookies = JSON.parse(cookiesString);
    await page.setCookie(...cookies);
    await page.goto(
      "https://www.nike.com.br/chuteira-nike-premier-2-sala-unissex-153-169-171-309321",
      { timeout: 0 }
    );
    await page.waitForXPath('//label[@for="tamanho__idM40F395"]', {
      visibility: true,
      timeout: 0,
    });
    const tamanho = await page.$x('//label[@for="tamanho__idM40F395"]');
    await tamanho[0].click('//label[@for="tamanho__idM40F395"]');
    await page.waitForSelector("button#btn-comprar");
    await page.click("buton#btn-comprar");
    console.log("Added to car!");
  } catch {
    console.log("Element is not on the page or unexpected error!");
  }
})();

How can I prevent a certain JavaScript file from being loaded? This is the file I want to block:

<script type="text/javascript" src="https://cdn-prod.securiti.ai/consent/sdk-stub.js" defer=""></script>

Solution

  • The req object has a .url() property which you can compare against the URL you want to block.

    In your code,

    if (
      // ...
      req.url() === "https://cdn-prod.securiti.ai/consent/sdk-stub.js"
    ) {
      req.abort();
    }
    // ...
    

    Here's a minimal example:

    const puppeteer = require("puppeteer");
    
    let browser;
    (async () => {
      const html = `
        <script src="https://unpkg.com/react@17/umd/react.development.js"></script>
      `;
      browser = await puppeteer.launch();
      const [page] = await browser.pages();
    
      // make a request with script allowed
      await page.setContent(html);
      console.log(await page.evaluate(() => !!React)); // => true
    
      // set up blocking
      await page.setRequestInterception(true);
      const blockedResourceTypes = [
        "image", "stylesheet", "font", "object",
        "texttrack", "imageset", "beacon", "csp_report",
      ];
      const blockedURLs = [
        "https://unpkg.com/react@17/umd/react.development.js",
      ];
      const allowedRequest = req =>
        !blockedResourceTypes.includes(req.resourceType()) &&
        !blockedURLs.includes(req.url());
      page.on("request", req => {
        if (allowedRequest(req)) {
          req.continue();
        }
        else {
          req.abort();
        }
      });
    
      // now the evaluate will throw because we blocked the script
      await page.reload();
      await page.setContent(html);
      console.log(await page.evaluate(() => !!React));
    })()
      .catch(err => console.error(err))
      .finally(() => browser?.close());
    

    I think you meant "beacon" rather than "bacon".