Search code examples
google-apps-scriptgoogle-docs

Get All Links in a Document


Given a "normal document" in Google Docs/Drive (e.g. paragraphs, lists, tables) which contains external links scattered throughout the content, how do you compile a list of links present using Google Apps Script?

Specifically, I want to update all broken links in the document by searching for oldText in each url and replace it with newText in each url, but not the text.

I don't think the replacing text section of the Dev Documentation is what I need -- do I need to scan every element of the doc? Can I just editAsText and use an html regex? Examples would be appreciated.


Solution

  • This is only mostly painful! Code is available as part of a gist.

    ScreenshotYeah, I can't spell.

    getAllLinks

    Here's a utility function that scans the document for all LinkUrls, returning them in an array.

    /**
     * Get an array of all LinkUrls in the document. The function is
     * recursive, and if no element is provided, it will default to
     * the active document's Body element.
     *
     * @param {Element} element The document element to operate on. 
     * .
     * @returns {Array}         Array of objects, vis
     *                              {element,
     *                               startOffset,
     *                               endOffsetInclusive, 
     *                               url}
     */
    function getAllLinks(element) {
      var links = [];
      element = element || DocumentApp.getActiveDocument().getBody();
      
      if (element.getType() === DocumentApp.ElementType.TEXT) {
        var textObj = element.editAsText();
        var text = element.getText();
        var inUrl = false;
        for (var ch=0; ch < text.length; ch++) {
          var url = textObj.getLinkUrl(ch);
          if (url != null) {
            if (!inUrl) {
              // We are now!
              inUrl = true;
              var curUrl = {};
              curUrl.element = element;
              curUrl.url = String( url ); // grab a copy
              curUrl.startOffset = ch;
            }
            else {
              curUrl.endOffsetInclusive = ch;
            }          
          }
          else {
            if (inUrl) {
              // Not any more, we're not.
              inUrl = false;
              links.push(curUrl);  // add to links
              curUrl = {};
            }
          }
        }
        if (inUrl) {
          // in case the link ends on the same char that the element does
          links.push(curUrl); 
        }
      }
      else {
        var numChildren = element.getNumChildren();
        for (var i=0; i<numChildren; i++) {
          links = links.concat(getAllLinks(element.getChild(i)));
        }
      }
    
      return links;
    }
    

    findAndReplaceLinks

    This utility builds on getAllLinks to do a find & replace function.

    /**
     * Replace all or part of UrlLinks in the document.
     *
     * @param {String} searchPattern    the regex pattern to search for 
     * @param {String} replacement      the text to use as replacement
     *
     * @returns {Number}                number of Urls changed 
     */
    function findAndReplaceLinks(searchPattern,replacement) {
      var links = getAllLinks();
      var numChanged = 0;
      
      for (var l=0; l<links.length; l++) {
        var link = links[l];
        if (link.url.match(searchPattern)) {
          // This link needs to be changed
          var newUrl = link.url.replace(searchPattern,replacement);
          link.element.setLinkUrl(link.startOffset, link.endOffsetInclusive, newUrl);
          numChanged++
        }
      }
      return numChanged;
    }
    

    Demo UI

    To demonstrate the use of these utilities, here are a couple of UI extensions:

    function onOpen() {
      // Add a menu with some items, some separators, and a sub-menu.
      DocumentApp.getUi().createMenu('Utils')
          .addItem('List Links', 'sidebarLinks')
          .addItem('Replace Link Text', 'searchReplaceLinks')
          .addToUi();
    }
    
    function searchReplaceLinks() {
      var ui = DocumentApp.getUi();
      var app = UiApp.createApplication()
                     .setWidth(250)
                     .setHeight(100)
                     .setTitle('Change Url text');
      var form = app.createFormPanel();
      var flow = app.createFlowPanel();
      flow.add(app.createLabel("Find: "));
      flow.add(app.createTextBox().setName("searchPattern"));
      flow.add(app.createLabel("Replace: "));
      flow.add(app.createTextBox().setName("replacement"));
      var handler = app.createServerHandler('myClickHandler');
      flow.add(app.createSubmitButton("Submit").addClickHandler(handler));
      form.add(flow);
      app.add(form);
      ui.showDialog(app);
    }
    
    // ClickHandler to close dialog
    function myClickHandler(e) {
      var app = UiApp.getActiveApplication();
    
      app.close();
      return app;
    }
    
    function doPost(e) {
      var numChanged = findAndReplaceLinks(e.parameter.searchPattern,e.parameter.replacement);
      var ui = DocumentApp.getUi();
      var app = UiApp.createApplication();
      
      sidebarLinks(); // Update list
    
      var result = DocumentApp.getUi().alert(
          'Results',
          "Changed "+numChanged+" urls.",
          DocumentApp.getUi().ButtonSet.OK);
    }
    
    
    /**
     * Shows a custom HTML user interface in a sidebar in the Google Docs editor.
     */
    function sidebarLinks() {
      var links = getAllLinks();
      var sidebar = HtmlService
              .createHtmlOutput()
              .setTitle('URL Links')
              .setWidth(350 /* pixels */);
    
      // Display list of links, url only.
      for (var l=0; l<links.length; l++) {
        var link = links[l];
        sidebar.append('<p>'+link.url);
      }
      
      DocumentApp.getUi().showSidebar(sidebar);
    }