Search code examples
javascriptgoogle-chrome-extension

How can I get selected text in pdf in Javascript?


I'm writing a Chrome Extention to manipulate pdf file so I want to get selected text in the pdf. How can I do that.

Some thing like that:

enter image description here


Solution

  • You can use the internal undocumented commands of the built-in PDF viewer.

    Here's an example of a content script:

    function getPdfSelectedText() {
      return new Promise(resolve => {
        window.addEventListener('message', function onMessage(e) {
          if (e.origin === 'chrome-extension://mhjfbmdgcfjbbpaeojofohoefgiehjai' &&
              e.data && e.data.type === 'getSelectedTextReply') {
            window.removeEventListener('message', onMessage);
            resolve(e.data.selectedText);
          }
        });
        // runs code in page context to access postMessage of the embedded plugin
        const script = document.createElement('script');
        if (chrome.runtime.getManifest().manifest_version > 2) {
          script.src = chrome.runtime.getURL('query-pdf.js');
        } else {
          script.textContent = `(${() => {
            document.querySelector('embed').postMessage({type: 'getSelectedText'}, '*');
          }})()`;
        }
        document.documentElement.appendChild(script);
        script.remove();
      });
    }
    
    chrome.runtime.onMessage.addListener((msg, sender, sendResponse) => {
      if (msg === 'getPdfSelection') {
        getPdfSelectedText().then(sendResponse);
        return true;
      }
    });
    

    This example assumes you send a message from the popup or background script:

    chrome.tabs.query({active: true, currentWindow: true}, ([tab]) => {
      chrome.tabs.sendMessage(tab.id, 'getPdfSelection', sel => {
        // do something
      });
    });
    

    See also How to open the correct devtools console to see output from an extension script?

    ManifestV3 extensions also need this:

    • manifest.json should expose query-pdf.js

        "web_accessible_resources": [{
          "resources": ["query-pdf.js"],
          "matches": ["<all_urls>"],
          "use_dynamic_url": true
        }]
      
    • query-pdf.js

      document.querySelector('embed').postMessage({type: 'getSelectedText'}, '*')