Search code examples
javascriptxmlescaping

Unescape strings in Javascript


I need to unescape strings in Javascript, but my string sometimes is already unescaped, and other times it is not:

// String 1
<?xml version="1.0" encoding="UTF-8" ?>

// String 2
<?xml version="1.0" encoding="UTF-8"?>

I use the following method:

function htmlDecode(input)
{
  var doc = new DOMParser().parseFromString(input, "text/html");
  return doc.documentElement.textContent;
}

But the problem is that when I "decode" string 2, the answer comes out as ?xml version="1.0" encoding="UTF-8"?

Help is appreciated.


Solution

  • You can do a regex check on the string to see if encoded versions of the characters exist. If they do exist, then do the decode, otherwise, just return back what you handed in to the function.

    var string1 = '&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;';
    var string2 = '<?xml version="1.0" encoding="UTF-8"?>';
    
    function decode(input) {
      if (/&amp;|&quot;|&#39;|'&lt;|&gt;/.test(input)) {
        var doc = new DOMParser().parseFromString(input, "text/html");
        return doc.documentElement.textContent;
      }
      return input;
    }
    
    console.log(decode(string1));
    console.log(decode(string2))


    Even Simpler (and better):

    This method requires no regex and will always return back the unescaped strings without "over-unescaping" them:

    var string1 = '&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;';
    var string2 = '<?xml version="1.0" encoding="UTF-8"?>';
    
    function decode(input) {
      var txt = document.createElement("textarea");
      txt.innerHTML = input;
      return txt.value;
    }
    
    console.log(decode(string1));
    console.log(decode(string2))