Search code examples
javascriptutf-32

Cleaning string in JS from UTF-32 characters


I need to clean characters encoded in UTF-32 from string in JS, like "💣". I tried to use code:

str.replace(/[^\u0000-\uFFFF]/gi, '')

But it isn't work.


Solution

  • For clean message I used

    function fixedCharCodeAt(str, idx) {
      var code = str.charCodeAt(idx);
      if (0xD800 <= code && code <= 0xDBFF) { 
        // Upper auxiliary char
        var hi = code;
        var low = str.charCodeAt(idx+1);
        return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
     }
     if (0xDC00 <= code && code <= 0xDFFF) { 
       // Lower auxiliary symbol
        var hi = str.charCodeAt(idx-1);
        var low = code;
        return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
      }
      return code;
    }
    

    and

    function cleaningMsgFromBreakingSymb(message_old) {
      var new_message = "";
      for (var i = 0, len = message_old.length; i < len; i++) {
        if (fixedCharCodeAt(message_old, i) < 65535){
            new_message += message_old[i];
        };
      };
      return new_message;
    }