Search code examples
javascripthtmlcomments

How to get the content of an HTML comment using JavaScript


How can I get the content of an HTML comment using JavaScript? I tried this:

function getContent(comment) {
  return comment
    .replaceAll("<!--", "")
    .replaceAll("-->", "")
}

// Expected output: "hello world"
console.log(getContent("<!--hello world-->"));

// Expected output: "some text", but the real content of the comment is "some <!--text"
console.log(getContent("<!--some <!--text-->"));

But when the edge case where there is additional <!--'s happens, the additional <!--'s get removed. How can I fix this.


Solution

  • Instead of replacing all, use a regular expression to replace <!-- eventually followed by --> so that only proper pairs get removed, together.

    function getContent(comment) {
      return comment
        .replace(/<!--(.*?)-->/g, "$1");
    }
    
    // Expected output: "hello world"
    console.log(getContent("<!--hello world-->"));
    
    // Expected output: "some text", but the real content of the comment is "some text"
    console.log(getContent("<!--some <!--text-->"));

    Another option would be to use DOMParser to turn the text into a document, then iterate over only comments in the document and remove them, then return the content that remains in the document.

    function getContent(comment) {
      const doc = new DOMParser().parseFromString(comment, 'text/html');
      var walker = document.createTreeWalker(
        doc.body, 
        NodeFilter.SHOW_COMMENT, 
        null, 
        false
      );
      const textNodes = [];
      while(node = walker.nextNode()) {
        textNodes.push(node);
      }
      for (const node of textNodes) {
        node.remove();
      }
      return doc.body.innerHTML;
    }
    
    console.log(getContent("<!--hello world-->"));
    console.log(getContent("<!--some <!--text-->"));
    console.log(getContent("foobar <!--some <!--text--> barbaz <!-- another comment -->"));