Search code examples
javascriptjquerypdfpdf.jsadobe-reader

Show Searched text of a pdf along with page number


I was looking for a solution where i have a PDF file and i want to search a particular text in that file and the result of that searched text should come in a list format along with its page number. I looked for online solution but was unable to find the perfect and proper solution to it...

Although there is same type of feature available in adobe reader which is called as "comments" where user can view all the searched items in a list format along with its page numbers.

Your answer would be really helpful for me and if possible please provide example too..

Thank you in advance.


Solution

  • Here is the example that might help you to display found text grouped per page using PDF.js.

    var searchText = "JavaScript";
    function searchPage(doc, pageNumber) {
      return doc.getPage(pageNumber).then(function (page) {
        return page.getTextContent();
      }).then(function (content) {
        // Search combined text content using regular expression
        var text = content.items.map(function (i) { return i.str; }).join('');
        var re = new RegExp("(.{0,20})" + searchText + "(.{0,20})", "gi"), m;
        var lines = [];
        while (m = re.exec(text)) {
          var line = (m[1] ? "..." : "") + m[0] + (m[2] ? "..." : "");
          lines.push(line);
        }
        return {page: pageNumber, items: lines};
      });
    }
    
    var loading = PDFJS.getDocument("//cdn.mozilla.net/pdfjs/tracemonkey.pdf");
    loading.promise.then(function (doc) {
      var results = [];
      for (var i = 1; i <= doc.numPages; i++)
        results.push(searchPage(doc, i));
      return Promise.all(results);
    }).then(function (searchResults) {
      // Display results using divs
      searchResults.forEach(function (result) {
        var div = document.createElement('div'); div.className="pr"; document.body.appendChild(div);
        div.textContent = 'Page ' + result.page + ':';
        result.items.forEach(function (s) {
          var div2 = document.createElement('div'); div2.className="prl"; div.appendChild(div2);
          div2.textContent = s; 
        });
      });
    }).catch(console.error);
    .pr { font-family: sans-serif; font-weight: bold; }
    .prl { font-style: italic; font-weight: normal; }
    <script src="//npmcdn.com/pdfjs-dist/build/pdf.js"></script>