Search code examples
javascriptfilterescapingprofanity

unescape not working in profanity filter


I am trying to make a profanity filter with javascript. I was successful but when I encode the bad words I can't get it

to work. I have been working on this for two days straight.

I have tried to unescape the code in a variable and then use the variable when matching. I have tried unescaping in the

match code too. I have tried mixing in document.write and everything else I can think of.

My original functioning code:

var badwords = /fck|psssy|ssshole/i;

Baddata1 = FirstName.value;
Baddata2 = LastName.value;


if (Baddata1.match(badwords))
            {
                checker();
                FirstName.focus();
                return false;
            }
            if (Baddata2.match(badwords))
            {
                checker();
                LastName.focus();
                return false;
            }
function checker() 
        {
            window.alert("Please Remove Bad Words");
        }

Solution

  • You can reverse the string by subtracting char codes from 0xffff to encode, then, reverse it back again to get clear text. Use "new RegExp" to construct:

    var encstr = "ン゙ロテム゙フヒニテネミヘロ";   // "bad|nasty|word" put through reverse() function
    var badwords = new RegExp(reverse(encstr), "i");
    var Baddata1 = "bad";
    var Baddata2 = "LastName";
    
    function reverse(str) {
        var sout = "", ix;
        if (!str) {
            return "";
        }
        for (ix = 0;  ix < str.length;  ++ix) {
            sout += String.fromCharCode(0xffff - str.charCodeAt(ix));
        }
        return sout;
    }
    
    
    if (Baddata1.match(badwords))
    {
        checker();
        FirstName.focus();
        return false;
    }
    if (Baddata2.match(badwords))
    {
        checker();
        LastName.focus();
        return false;
    }
    function checker()
    {
        window.alert("Please Remove Bad Words");
    }
    

    Working jsfiddle here.

    If you don't like using high character codes, I can easily substitute various encoding functions which don't, though this one is the most compact.

    Edit: To get the reversed string, either use a JS debugger to call reverse, or, add temporary code like this:

    console.log(reverse("bad|nasty|word"));
    

    This works because reverse(reverse(string1)) === string1. reverse undoes itself.

    You could also keep a list of words in a separate script, and use JS string join passed to reverse to make the list, for example:

    var wordlist = ["bad", "nasty", "word"];
    var joined = wordlist.join("|");
    console.log('var encstr = "' + reverse(joined) + '"');
    

    Once you've copied the string from the debug console and pasted it, the separate script could easily check that it's correct:

    var encstr = "ン゙ロテム゙フヒニテネミヘロ";
    alert("encstr " + (reverse(encstr) === joined ? "matches" : "does NOT match") + " original");
    

    Edit 2: If you don't want to use high char codes that fall into international ranges, just use an encoding like base64, or this simple set:

    function encodeStr(str) {
        var sout = "", ix;
        if (!str) {
            return "";
        }
        for (ix = 0;  ix < str.length;  ++ix) {
            if (sout.length)
                sout += ",";
            sout += str.charCodeAt(ix).toString(16);
        }
        return sout;
    }
    
    function decodeStr(str) {
        var sout = "", narr, ix;
        if (!str) {
            return "";
        }
        narr = str.split(",");
        for (ix = 0;  ix < narr.length;  ++ix) {
            sout += String.fromCharCode(parseInt(narr[ix], 16));
        }
        return sout;
    }
    
    // Using encodeStr on "bad|nasty|word" makes this:
    var encstr = "62,61,64,7c,6e,61,73,74,79,7c,77,6f,72,64";
    var badwords = new RegExp(decodeStr(encstr), "i");