Search code examples
javascripttreetraversal

How to convert an array of text segments into DOM tree object?


Let's edit a text block in Figma as shown in the image:

enter image description here

Figma Plugin API gives the following segments for this text block:

const segments = [
  { "characters": "Lorem ", "fontWeight": 400, "listOptions": { "type": "NONE" }, "indentation": 0, "hyperlink": null },
  { "characters": "Ipsum", "fontWeight": 700, "listOptions": { "type": "NONE" }, "indentation": 0, "hyperlink": null },
  { "characters": " is \nsimply dummy text of \n", "fontWeight": 400, "listOptions": { "type": "NONE" }, "indentation": 0, "hyperlink": null },
  { "characters": "the printing and \n", "fontWeight": 400, "listOptions": { "type": "UNORDERED" }, "indentation": 1, "hyperlink": null },
  { "characters": "typesetting \n", "fontWeight": 400, "listOptions": { "type": "UNORDERED" }, "indentation": 2, "hyperlink": null },
  { "characters": "industry. \n", "fontWeight": 400, "listOptions": { "type": "UNORDERED" }, "indentation": 1, "hyperlink": null },
  { "characters": "Lorem Ipsum has been the ", "fontWeight": 400, "listOptions": { "type": "NONE" }, "indentation": 0, "hyperlink": null },
  { "characters": "industry's standard", "fontWeight": 400, "listOptions": { "type": "NONE" }, "indentation": 0, "hyperlink": { "type": "URL", "value": "http://example.com" } },
  { "characters": " dummy text ever since the 1500s, \n", "fontWeight": 400, "listOptions": { "type": "NONE" }, "indentation": 0, "hyperlink": null },
  { "characters": "when an unknown \n", "fontWeight": 400, "listOptions": { "type": "ORDERED" }, "indentation": 1, "hyperlink": null },
  { "characters": "printer took \na galley of \ntype and \n", "fontWeight": 400, "listOptions": { "type": "UNORDERED" }, "indentation": 2, "hyperlink": null },
  { "characters": "scrambled it\n", "fontWeight": 400, "listOptions": { "type": "ORDERED" }, "indentation": 1, "hyperlink": null },
  { "characters": "\nto make a type\n\n", "fontWeight": 400, "listOptions": { "type": "NONE" }, "indentation": 0, "hyperlink": null },
  { "characters": "specimen book.\n", "fontWeight": 400, "listOptions": { "type": "UNORDERED" }, "indentation": 1, "hyperlink": null },
  { "characters": "It has survived\n", "fontWeight": 400, "listOptions": { "type": "ORDERED" }, "indentation": 3, "hyperlink": null },
  { "characters": "not only\nfive centuries,", "fontWeight": 400, "listOptions": { "type": "ORDERED" }, "indentation": 2, "hyperlink": null }
]

Since the list is long, let's simplify it a bit:

const segments = [
  { ind: 0, list: null, chars: "Lorem ", bold: false, link: null },
  { ind: 0, list: null, chars: "Ipsum", bold: true, link: null },
  { ind: 0, list: null, chars: " is \nsimply dummy text of \n", bold: false, link: null },
  { ind: 1, list: "UL", chars: "the printing and \n", bold: false, link: null },
  { ind: 2, list: "UL", chars: "typesetting \n", bold: false, link: null },
  { ind: 1, list: "UL", chars: "industry. \n", bold: false, link: null },
  { ind: 0, list: null, chars: "Lorem Ipsum has been the ", bold: false, link: null },
  { ind: 0, list: null, chars: "industry's standard", bold: false, link: "http://example.com" },
  { ind: 0, list: null, chars: " dummy text ever since the 1500s, \n", bold: false, link: null },
  { ind: 1, list: "OL", chars: "when an unknown \n", bold: false, link: null },
  { ind: 2, list: "UL", chars: "printer took \na galley of \ntype and \n", bold: false, link: null },
  { ind: 1, list: "OL", chars: "scrambled it\n", bold: false, link: null },
  { ind: 0, list: null, chars: "\nto make a type\n\n", bold: false, link: null },
  { ind: 1, list: "UL", chars: "specimen book.\n", bold: false, link: null },
  { ind: 3, list: "OL", chars: "It has survived\n", bold: false, link: null },
  { ind: 2, list: "OL", chars: " not only\nfive centuries,", bold: false, link: null }
]

I'm trying to take this segments data and convert it into an HTML tree with Javascript. The output should be as follows:

<span>Lorem </span>
<strong>Ipsum</strong> 
<span> is <br>simply dummy text of </span>
<ul>
  <li>
    <span>the printing and </span>
  </li>
  <ul>
    <li><span>typesettting </span></li>
  </ul>
  <li><span>industry. </span></li>
</ul>
<span>Lorem Ipsum has been the </span>
<a href="http://example.com">industry's standard</a>
<span> dummy text ever since the 1500s, </span>
<ol>
  <li><span>when an unknown </span></li>
  <ul>
    <li><span>printer took </span></li>
    <li><span>a galley of </span></li>
    <li><span>type and </span></li>
  </ul>
  <li><span>scrambled it</span></li>
</ol>
<span>to make a type</span>
<ul>
  <li>
    <span>specimen book.</span>
  </li>
  <ol>
    <ol>
      <li><span>It has survived</span></li>
    </ol>
    <li><span>not only</span></li>
    <li><span>five countries,</span></li>
  </ol>
</ul>

I tried:

function getPureSegment(chars: string) {
  if (chars.endsWith("\n")) chars = chars.slice(0, -1)
  return ["<span>", chars.replaceAll(/\n/g, "<br>"), "</span>"]
}

function getOpeningListTag(segment) {
  const type = segment.listOptions.type
  if (type === "ORDERED") return "<ol>"
  if (type === "UNORDERED") return "<ul>"
}

function getClosingListTag(segment) {
  const type = segment.listOptions.type
  if (type === "ORDERED") return "</ol>"
  if (type === "UNORDERED") return "</ul>"
}

function getHtml(segments) {
  let prevSegment = { indentation: 0 }
  return segments.flatMap((segment, idx) => {
    const pure = getPureSegment(segment.characters)
    let line
    const endsBreakLine = segment.characters.endsWith("\n")
    const isLastSegment = idx === segments.length - 1
    if (segment.indentation == 0) {
      if (segment.indentation < prevSegment.indentation) {
        line = [getClosingListTag(prevSegment), ...pure]
      } else {
        line = pure
      }
    } else if (segment.indentation > 0) {
      if (segment.indentation > prevSegment.indentation) {
        line = [getOpeningListTag(segment), "<li>", ...pure, (isLastSegment || segments[idx + 1].indentation < segment.indentation) && "</li>"].filter(Boolean)
      } else if (segment.indentation == prevSegment.indentation) {
        line = [segments[idx - 1].characters.endsWith("\n") && "<li>", ...pure, endsBreakLine && "</li>"].filter(Boolean)
      } else {
        line = [getClosingListTag(segment), "<li>", ...pure, endsBreakLine && "</li>"].filter(Boolean)
      }
      if (isLastSegment) line.push("</ul>".repeat(segment.indentation))
    }
    prevSegment = segment
    return line
  }).join("\n")
}

Solution

  • A reliable solution can be achieved already by a single reduce based iteration cycle over the OP's computed segments array.

    The markup gets aggregated while reduce consumes the array by invoking the reducer function for each segments-item. Thus, the reducer function needs to be implemented in a way where one can keep track of the aggregated markup's opened/closed nested list-tags. One way of achieving it, is to provide a collector object as the reduce methods initial value, which does carry all the necessary data in addition to e.g. its result property.

    const segments = [
      { ind: 0, list: null, chars: "Lorem ", bold: false, link: null },
      { ind: 0, list: null, chars: "Ipsum", bold: true, link: null },
      { ind: 0, list: null, chars: " is \nsimply dummy text of \n", bold: false, link: null },
      { ind: 1, list: "UL", chars: "the printing and \n", bold: false, link: null },
      { ind: 2, list: "UL", chars: "typesetting \n", bold: false, link: null },
      { ind: 1, list: "UL", chars: "industry. \n", bold: false, link: null },
      { ind: 0, list: null, chars: "Lorem Ipsum has been the ", bold: false, link: null },
      { ind: 0, list: null, chars: "industry's standard", bold: false, link: "http://example.com" },
      { ind: 0, list: null, chars: " dummy text ever since the 1500s, \n", bold: false, link: null },
      { ind: 1, list: "OL", chars: "when an unknown \n", bold: false, link: null },
      { ind: 2, list: "UL", chars: "printer took \na galley of \ntype and \n", bold: false, link: null },
      { ind: 1, list: "OL", chars: "scrambled it\n", bold: false, link: null },
      { ind: 0, list: null, chars: "\nto make a type\n\n", bold: false, link: null },
      { ind: 1, list: "UL", chars: "specimen book.\n", bold: false, link: null },
      { ind: 2, list: "OL", chars: "It has survived\nnot only\nfive centuries,", bold: false, link: null }
    ];
    const markup = segments
      .reduce(aggregateMarkup, { result: '' })
      .result;
    
    document
      .querySelector('#test')
      .innerHTML = markup;
    
    console.log('markup ...', markup);
    body { margin: 0; }
    #test { width: 50%; }
    .as-console-wrapper { left: auto!important; width: 50%; min-height: 100%;  }
    <script>
    function aggregateMarkup(collector, segment, idx, segmentArray) {
      let { openTagNames = [], result = '' } = collector;
      const {
        ind: indentation, list,
        bold: isBold, link, chars = '',
      } = segment;
    
      const isLink = !!link;
      const isListItem = !!list;
    
      const isOpeningListItem =
        isListItem && ((segmentArray[idx - 1]?.ind ?? 0) < indentation);
    
      const upcomingIndentation = (segmentArray[idx + 1]?.ind ?? 0);
    
      const isTerminatingListItem =
        isListItem && (upcomingIndentation < indentation);
    
      const isListItemTerminator =
        (indentation > 1) && (upcomingIndentation < indentation);
    
      let markup = chars
        .trim()
        .replace(/(?:^[\n\s]+)|(?:[\n\s]+$)/g, '')
        .replace(/\n/g, isListItem && '<\/li><li>' || '<br\/>');
    
      if (!isBold && !isLink && !isListItem) {
    
        result = `${ result } <span>${ markup }<\/span> `;
    
      } else {
        const listTagName = String(list ?? '').toLowerCase();
    
        if (isOpeningListItem) {
          // - keep track of opening list tag-names
          //   by pushing the currently opened tag-name
          //   into the tracking-list for every opened list-tag.
          openTagNames.push(listTagName);
    
          const regXTerminatingLastLiTag = /<\/li>\s*$/;
          if (regXTerminatingLastLiTag.test(result)) {
    
            result = result.replace(regXTerminatingLastLiTag, '');
          }
          result = `${ result }<${ listTagName }>`;
        }
    
        if (isBold) {
          markup = `<strong>${ markup }<\/strong>`;
        }
        if (isLink) {
          markup = `<a href="${ link }">${ markup }<\/a>`;
        }
        if (isListItem) {
          markup = `<li>${ markup }<\/li>`;
        }
        result = [result, markup].join(' ');
    
        if (isTerminatingListItem) {
          // - keep track of opening list tag-names
          //   by removing the last tag-name from the
          //   tracking-list for every closed list-tag.
          openTagNames.pop();
    
          result = `${ result }<\/${ listTagName }>`;
        }
        const regXTerminatingLastListTag = /<\/[ou]l>\s*$/;
        if (isListItemTerminator) {
    
          result = `${ result }<\/li>`;
        }
      }
    
      if ((idx >= segmentArray.length - 1) && !!openTagNames.length) {
        // - at the time, the entire segments array has been iterated
        //   make sure to terminate every tracked, still unclosed tag.
        result = openTagNames
          .reduce((markup, tagName) => `${ markup }<\/${ tagName }>`, result);
      }
      return { openTagNames, result };
    }
    </script>
    
    <div id="test"></div>