javascript google-apps-script google-docs-api

Extra spaces when inserting text in Google Docs tables - rich text version

Pesky Spaces

As has been noted here and here on SO, Google Apps Script has a propensity to add in random new lines to tables and other elements (although the character in question seems to be \10, as noted in the first link.) The hack noted in the first link (and implemented in the code below) works fine if you don't have any Text elements with rich formatting, but I am having trouble extending the hack and would appreciate any direction.

my use case

In my case, I am making an Nx2 table in which the left column contains English text and the right column Hebrew. The Hebrew needs to be added dynamically with insertParagraph() in order to allow for the proper left-to-right settings, but doing so adds the aforementioned random break character. Using the hack and then applying the proper attributes to the modified text for some reason does nothing to the first row while adding a newline back to the second column, while changing the l-t-r direction as intended.

code (edited)

I am realizing that my MRE isn't really an MRE, so here's something a bit closer to the code I am working with. insertRichTextFromHTML converts an HTML string to Google Docs rich-text format. The data set I am working with comes from Sefaria, a repository of Jewish texts. Note that the code integrates @Tanaike's suggestions above; for some reason it still doesn't work as expected:

function insertReference() {
  let reference = "Shemot 12:2-4";
  let url = 'http://www.sefaria.org/api/texts/'+reference+'?commentary=0&context=0';
  let response = UrlFetchApp.fetch(url);
  let json = response.getContentText();
  let data = JSON.parse(json);
  let doc = DocumentApp.getActiveDocument().getBody();
  let index = DocumentApp.getActiveDocument().getBody().getNumChildren()-1;

    let cells = [
      ["", ""],
      ["", ""]
    ];
    let tableStyle = {};
        tableStyle[DocumentApp.Attribute.BOLD] = false;
    let table = doc.insertTable(index, cells)

    table.setAttributes(tableStyle);

    let engTitle = table.getCell(0, 0)
      .setText("")
      .insertParagraph(0, "")
      .setLeftToRight(true)
      .editAsText();
    insertRichTextFromHTML(engTitle, data.ref);

    let hebTitle = table.getCell(0, 1)
      .setText("")
      .insertParagraph(0, "")
      .setLeftToRight(false)
      .editAsText();
    insertRichTextFromHTML(hebTitle, data.heRef);

    let engText = table.getCell(1, 0)
      .setText("")
      .insertParagraph(0, "")
      .setLeftToRight(true)
      .editAsText();
    insertRichTextFromHTML(engText, data.text);

    let hebText = table.getCell(1, 1)
      .setText("")
      .insertParagraph(0, "")
      .setLeftToRight(false)
      .editAsText();
    insertRichTextFromHTML(hebText, data.he);
}
function insertRichTextFromHTML(element, htmlString) {
  let buf = [];
  let index = 0, italicsFnCount = 0, textLength = element.editAsText().getText().length;
  let bolded = false, italicized = false, inFootnote = false;
  if (Array.isArray(htmlString)) {
    htmlString = htmlString.join("");
  }
  let iterableString = htmlString.split(/(<\/?[a-zA-Z]+[a-zA-Z'"0-9= \-/]*>)/g);

  let inserterFn = (textModification) => {
    //grab all words in the buffer and join
    let snippet = buf.join("");

    //index of snippet needs to be zero-indexed. This is how we keep track of which words/phrases/sentences to bold/italicize
    let snippetLength = snippet.length;
    let snippetIndex = snippetLength - 1;

    if (snippet != "") {
      element.insertText(textLength, snippet);

      //set rich text settings
      element.setBold(textLength, textLength+snippetIndex, bolded); 
      element.setItalic(textLength, textLength+snippetIndex, italicized);

      textLength += snippetLength;
    }

    switch(textModification) {
      case "bold":
        bolded = !bolded;
        break;
      case "italic":
        italicized = !italicized;
        break;
      case "linebreak":
        element.insertText(textLength, "\n");
        textLength += 1;
        break;
    }
  }

  for (let i = 0; i < iterableString.length; i++) {
    let word = iterableString[i];

    /* example format of footnotes in the text: -----‘Do not let me see your faces<sup class=\"footnote-marker\">*</sup><i class=\"footnote\"><b>Do not let me see your faces </b>See note at v. 3.</i> unless----*/
    if (inFootnote) {
      if ( word == "<i class=\"footnote\">" || word == "<i>") {
        italicsFnCount++;
      } else if ( word == "</i>") {
        italicsFnCount--;
        if (italicsFnCount == 0) {
          inFootnote = false;
          continue;
        }
      }

    }

    else if (word[0] == "<") {
      //we have a tag; grab the name of the tag
      let tagName = /<\/?([a-zA-Z]+)([a-zA-Z'"0-9= \-/])*>/.exec(word)[1];

      switch (tagName) {
        case "b":
          inserterFn("bold");
          buf = [];
          index = 0;
          break;
        case "strong":
          inserterFn("bold");
          buf = [];
          index = 0;
          break;
        case "i":
          inserterFn("italic");
          buf = [];
          index = 0;
          break;
        case "br":
          inserterFn("linebreak");
          buf = [];
          index = 0;
          break;
        case "sup":
          inFootnote = true;
          // yes, this fires even at </sup>, but that is of no consequence for the logic
          italicsFnCount = 0;
          break;
        default:
          break;
      }
      continue;
    }

    if (!inFootnote) {
      buf[index++] = word;
    }
  }

  // add in the last words, if the text snippet does not end with a tag
  let snippet = buf.join("");
  if ( snippet != "" ) {
    element.insertText(textLength, snippet);
    let snippetIndex = snippet.length - 1;
    element.setBold(textLength, textLength+snippetIndex, false); 
    element.setItalic(textLength, textLength+snippetIndex, false);
  }
}

Current output: Desired output:

Solution

From your updated question, although I'm not so sure if I understand it correctly, how about modifying insertReference() as follows:

### From:
        let hebText = table.getCell(1, 1)
          .setText("")
          .insertParagraph(0, "")
          .setLeftToRight(false)
          .editAsText();
        insertRichTextFromHTML(hebText, data.he);
    }

### To:
      let hebText = table.getCell(1, 1)
        .setText("")
        .insertParagraph(0, "")
        .setLeftToRight(false)
        .editAsText();
      insertRichTextFromHTML(hebText, data.he);

      // I added the below script.
      for (let r = 0; r < table.getNumRows(); r++) {
        const row = table.getRow(r);
        for (let c = 0; c < row.getNumCells(); c++) {
          const cell = row.getCell(c);
          const n = cell.getNumChildren();
          cell.getChild(n - 1).removeFromParent();
        }
      }
    }

or

      let hebText = table.getCell(1, 1)
        .setText("")
        .insertParagraph(0, "")
        .setLeftToRight(false)
        .editAsText();
      insertRichTextFromHTML(hebText, data.he);

      // I added the below script.
      for (let r = 0; r < table.getNumRows(); r++) {
        const row = table.getRow(r);
        for (let c = 0; c < row.getNumCells(); c++) {
          const cell = row.getCell(c);
          const n = cell.getNumChildren();
          for (let t = 0; t < n; t++) {
            const child = cell.getChild(t);
            if (child.asParagraph().getText().trim() == "") {
              child.removeFromParent();
            }
          }
        }
      }
    }

I guessed that in your added script, the 1st insertParagraph(0, "") might be the reason for your current issue. So, I added the script for removing it. But, I do not know your actual script. So, when you use another script again, you might not be able to use the above script. Please be careful about this.