Search code examples
javascriptgoogle-chrome-extension

Download a webpage completely (Chrome Extension Manifest v3)


The Problem :::

  • I want to:

    1. Save a wabpage completely as Ctrl+S on google chrome. Using my chrome extension.
    2. Or if there is a library to convert .MHTML file to .zip that contains webpage files in a folder and the webpage as .html. (like what is done when saving the webpage, completely manually)
  • I used an old code from this question. Can we download a webpage completely with chrome.downloads.download? (Google Chrome Extension)

  • But I faced a problem using URL.createObjectURL() in the service worker which is undefined Since manifest v3 update.

This is the answer code from the link above:

// Create new tab, wait until it is loaded and save the page
chrome.tabs.create({
    url: 'http://example.com'
}, function(tab) {
    chrome.tabs.onUpdated.addListener(function func(tabId, changeInfo) {
        if (tabId == tab.id && changeInfo.status == 'complete') {
            chrome.tabs.onUpdated.removeListener(func);
            savePage(tabId);
        }
    });
});

function savePage(tabId) {
    chrome.pageCapture.saveAsMHTML({
        tabId: tabId
    }, function(blob) {
        var url = URL.createObjectURL(blob);
        // Optional: chrome.tabs.remove(tabId); // to close the tab
        chrome.downloads.download({
            url: url,
            filename: 'whatever.mhtml'
        });
    });
}



I've tried different approaches here to solve this old code:

1st Try :::

  • I added this function blobToBase64 instead of URL.createObjectURL()
  • The resulted .MHTML file opens blank webpage. Not like the file saved by chrome browser Ctrl+S (Save As) which opens perfectly.

service-worker.js:

function saveAsPage(url) {
    //Added code start
    const blobToBase64 = blob => new Promise((resolve, reject) => {
        const reader = new FileReader();
        reader.readAsDataURL(blob);
        reader.onload = () => resolve(reader.result);
        reader.onerror = error => reject(error);
    });
    //Added code end

    function savePage(tabId) {
        chrome.pageCapture.saveAsMHTML({
            tabId: tabId
        }, async function (blob) {

            //var url = URL.createObjectURL(blob); //not working in manifest v3 service worker

            var url = await blobToBase64(blob); //Added line code

            chrome.downloads.download({
                url: url,
                filename: 'whatever.mhtml'
            });

        });
    }

    chrome.tabs.create({
        url: url
    }, function (tab) {

        chrome.tabs.onUpdated.addListener(function func(tabId, changeInfo) {
            if (tabId == tab.id && changeInfo.status == 'complete') {
                chrome.tabs.onUpdated.removeListener(func);
                savePage(tabId, tab.title);
            }
        });
    });

}


# 2nd Try :::

  • This time, I tried to send the blob to the content-script from service-worker. To be processed and converted to a url using URL.createObjectURL(). But the blob recieved on content-script is corrupted. I think this is due to the large size of it. Its over than 2MB.

  • Also, tried to use saveAs() from FileSaver.js but the blob recieved is already corrupted. When I logged it to the console it is different from the one logged from the service worker console.

  • I have an error message on the console of the webpage from the content-script.js. The same error type is shown when using saveAs() from FileSaver.js:

Error in event handler: TypeError: Failed to execute 'createObjectURL' on 'URL': Overload resolution failed. at chrome-extension://ljoihpchcghncfcnfoflfbenhihjdomk/content-script.js:5:34

Here is the code:

service-worker.js:

function saveAsPage(url) {

    async function savePage(tabId, tabTitle) {
        chrome.pageCapture.saveAsMHTML({
            tabId: tabId
        }, async function (blob) {

            var response = await chrome.tabs.sendMessage(tabId, { blob: blob }); //Added line code

            chrome.downloads.download({
                url: response.url,
                filename: 'whatever.mhtml'
            });

        });
    }

    chrome.tabs.create({
        url: url
    }, function (tab) {

        chrome.tabs.onUpdated.addListener(function func(tabId, changeInfo) {
            if (tabId == tab.id && changeInfo.status == 'complete') {
                chrome.tabs.onUpdated.removeListener(func);
                savePage(tabId, tab.title);
            }
        });
    });

}

content-script.js:

chrome.runtime.onMessage.addListener(
    function (request, sender, sendResponse) {
        if (request.blob != undefined) {
            //saveAS(request.blob); // From FileSaver.js
            var url = window.URL.createObjectURL(request.blob);
            sendResponse({ url: url });
        }

    }
);

  • I use this function from service worker console for testing:
saveAsPage("https://www.babaloo.gr/product-category/epohiakaeidi/thermansi/aerotherma/");

  • manifest.json
{
    "manifest_version": 3,
    "name": "Test",
    "description": "",
    "version": "0.0.1",
    "icons": {
        "16": "logo/logo-16.png",
        "48": "logo/logo-48.png",
        "128": "logo/logo-128.png"
    },
    "permissions": [
        "activeTab",
        "pageCapture",
        "downloads",
        "tabs",
        "nativeMessaging"
    ],
    "host_permissions": [
        "*://*/*"
    ],
    "background": {
        "service_worker": "service-worker.js"
    },
    "content_scripts": [
        {
            "js": [
                "FileSaver.js",
                "content-script.js"
            ],
            "matches": [
                "*://*/*"
            ]
        }
    ]
}

Solution

  • This is an implementation using offscreen.

    manifest.json

    {
      "name": "chrome.pageCapture",
      "version": "1.0",
      "manifest_version": 3,
      "permissions": [
        "pageCapture",
        "downloads",
        "offscreen"
      ],
      "background": {
        "service_worker": "background.js"
      },
      "action": {
        "default_title": "hoge"
      }
    }
    

    background.js

    chrome.action.onClicked.addListener(() => {
      chrome.offscreen.createDocument({
        url: chrome.runtime.getURL("offscreen.html"),
        reasons: ["BLOBS"],
        justification: "justification is required.",
      }, () => {
        chrome.tabs.query({ active: true, currentWindow: true }, (tabs) => {
          chrome.pageCapture.saveAsMHTML({ tabId: tabs[0].id }, async (md) => {
            const mdText = await md.text();
            chrome.runtime.sendMessage({ mdText: mdText }, (response) => {
              const url = response.url;
              chrome.downloads.download({
                url: url,
                filename: "hoge.mhtml"
              });
            });
          });
        });
      });
    });
    

    offscreen.html

    <html>
    <body>
      <script src="offscreen.js"></script>
    </body>
    </html>
    

    offscreen.js

    chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
      const md = new Blob([message.mdText], { type: "text/mhtml" });
      const url = URL.createObjectURL(md);
      sendResponse({ url: url });
      return true;
    });