Search code examples
javascripthtmlhtml-escape-characters

How to escape HTML but do not escape character entities?


I have to escape an arbitrary string and then assign to innerHTML.

But the string may has already contain character entities, like

var str1 = "A & B & C";
var str2 = "A > B > C";
var str3 = "A&qout;B&qout; C &qout;";

And I want to get

A & B & C 
A > B > C
A"B" C " 

The question is that how to escape "&" but do not escape "& in other character entities"?


Solution

  • First unescape, so replace & with &, and then escape.

    Example:

    function unescapeHtml(unsafe) {
        return unsafe
             .replace(/&/g, "&")
             .replace(/&lt;/g, "<")
             .replace(/&gt;/g, ">")
             .replace(/&quot;/g, '"')
             .replace(/&#039;/g, "'")
          
    }
    
    function escapeHtml(unsafe) {
        return unsafe
             .replace(/&/g, "&amp;")
             .replace(/</g, "&lt;")
             .replace(/>/g, "&gt;")
             .replace(/"/g, "&quot;")
             .replace(/'/g, "&#039;");
    }
    function setText(id, text){
    
       document.getElementById(id).innerHTML = escapeHtml(unescapeHtml(text))
    } 
    
    var str1 = "A &amp; B & C";
    var str2 = "A &gt; B > C";
    var str3 = "A&amp;B&amp;C"
    var str4 = "A&quot;B&quot; C &quot;"
       
    setText("container1", str1); 
    setText("container2", str2);
    setText("container3", str3);
    setText("container4", str4);
    <div id="container1"></div>
    <div id="container2"></div>
    <div id="container3"></div>
    <div id="container4"></div>