Search code examples
javascripthtmlregexhtml-escape-characters

Escape custom tags except html tags


I want to escape custom tags except for HTML tags such as strong, bold, italic.

 Input: "Hello World! <notification>Name</notification><nat>Nat tag</nat> <strong>This should be strong</strong><nas>Nas Tag</nas>"

Output: Hello World! <notification>Name</notification> <nat>Nat tag</nat>**This should be strong**<nas> Nas Tag</nas>

string.replace(/</g, "<").replace(/>/g, ">") .replace(/"/g, """).replace(/'/g, "'")
.replace(/<(?!/?strong>)[^>]+>/g, '')

I tried with the above replace but it is also replacing <strong> with &lt; strong &gt; any help would be appreciated.


Solution

  • Better to have a whitelist of allowed tags and "escape" anything that isn't in the list. Something like this will work for a simple implementation, but in general, regex is not a good tool for parsing HTML.

    var input = "Hello World! <notification asdfasd=asd>Name</notification><nat>Nat tag</nat> <strong>This should be strong</strong><nas>Nas Tag</nas>"
    
    var output = escapeCustomTags(input, ['strong'])
    console.log(output);
    
    function escapeCustomTags(input, allowed_tags = []) {
    
      // Make allowed tags array lower case
      allowed_tags = allowed_tags.map(c => c.toLowerCase());
    
      // Output is the input, edited
      var output = input;
    
      // Attempt to match an opening or closing HTML tag
      var reg = /<\/?([a-zA_Z0-9]*)[^>]*?>/g;
    
      // An array that will contain all disallowed tags
      var disallowed_tags = [];
    
      // For each tag in the input, if it's allowed, skip
      // Else, add it to the array.
      var match;
      while ((match = reg.exec(input)) !== null) {
        if (allowed_tags.includes(match[1].toLowerCase())) continue;
        disallowed_tags.push(match[0]);
      }
    
      // Replace each disallowed tag with the "escaped" version
      disallowed_tags.forEach(tag => {
        var find = tag;
        var replace = tag.replace('<', '&lt;').replace('>', '&gt;');
        output = output.replace(find, replace)
      });
    
      return output;
    }