innerHTML converts CDATA to comments

I'm trying to insert some HTML into a page using javascript, and the HTML I'm inserting contains CDATA blocks.

I'm finding, in Firefox and Chrome, that the CDATA is getting converted to a comment.

The HTML is not under my control, so it's difficult for me to avoid using CDATA.

The following test case, when there is a div on the page with id "test":

document.getElementById('test').innerHTML = '<![CDATA[foo]]> bar'

causes the following HTML to be appeded to the 'test' div:

<!--[CDATA[foo]]--> bar

Is there any way I can insert, verbatim, HTML containing CDATA into a document using javascript?

Solution

document.createCDATASection should do it, but the real answer to your question is that although HTML 5 does have CDATA sections cross-browser support for them is pretty spotty.

EDIT

The CDATA sections just aren't in the HTML 4 definition, so most browsers won't recognize them.

But it doesn't require a full DOM parser. Here's a simple lexical solution that will fix the problem.

function htmlWithCDATASectionsToHtmlWithout(html) {
    var ATTRS = "(?:[^>\"\']|\"[^\"]*\"|\'[^\']*\')*",
        // names of tags with RCDATA or CDATA content.
        SCRIPT = "[sS][cC][rR][iI][pP][tT]",
        STYLE = "[sS][tT][yY][lL][eE]",
        TEXTAREA = "[tT][eE][xX][tT][aA][rR][eE][aA]",
        TITLE = "[tT][iI][tT][lL][eE]",
        XMP = "[xX][mM][pP]",
        SPECIAL_TAG_NAME = [SCRIPT, STYLE, TEXTAREA, TITLE, XMP].join("|"),
        ANY = "[\\s\\S]*?",
        AMP = /&/g,
        LT = /</g,
        GT = />/g;
    return html.replace(new RegExp(
        // Entities and text
        "[^<]+" +
        // Comment
        "|<!--"+ANY+"-->" +
        // Regular tag
        "|<\/?(?!"+SPECIAL_TAG_NAME+")[a-zA-Z]"+ATTRS+">" +
        // Special tags
        "|<\/?"+SCRIPT  +"\\b"+ATTRS+">"+ANY+"<\/"+SCRIPT  +"\\s*>" +
        "|<\/?"+STYLE   +"\\b"+ATTRS+">"+ANY+"<\/"+STYLE   +"\\s*>" +
        "|<\/?"+TEXTAREA+"\\b"+ATTRS+">"+ANY+"<\/"+TEXTAREA+"\\s*>" +
        "|<\/?"+TITLE   +"\\b"+ATTRS+">"+ANY+"<\/"+TITLE   +"\\s*>" +
        "|<\/?"+XMP     +"\\b"+ATTRS+">"+ANY+"<\/"+XMP     +"\\s*>" +
        // CDATA section.  Content in capturing group 1.
        "|<!\\[CDATA\\[("+ANY+")\\]\\]>" +
        // A loose less-than
        "|<", "g"),

        function (token, cdataContent) {
          return "string" === typeof cdataContent
              ? cdataContent.replace(AMP, "&amp;").replace(LT, "&lt;")
                .replace(GT, "&gt;")
              : token === "<"
              ? "&lt;"  // Normalize loose less-thans.
              : token;
        });
}

Given

<b>foo</b><![CDATA[<i>bar</i>]]>

it produces

<b>foo</b>&lt;i&gt;bar&lt;/i&gt;

and given something that looks like a CDATA section inside a script or other special tag or comment, it correctly does not muck with it:

<script>/*<![CDATA[*/foo=bar<baz&amp;//]]></script><![CDATA[fish: <><]]>

becomes

<script>/*<![CDATA[*/foo=bar<baz&amp;//]]></script>fish: &lt;&gt;&lt;