I am trying to make a profanity filter with javascript. I was successful but when I encode the bad words I can't get it
to work. I have been working on this for two days straight.
I have tried to unescape the code in a variable and then use the variable when matching. I have tried unescaping in the
match code too. I have tried mixing in document.write and everything else I can think of.
My original functioning code:
var badwords = /fck|psssy|ssshole/i;
Baddata1 = FirstName.value;
Baddata2 = LastName.value;
if (Baddata1.match(badwords))
{
checker();
FirstName.focus();
return false;
}
if (Baddata2.match(badwords))
{
checker();
LastName.focus();
return false;
}
function checker()
{
window.alert("Please Remove Bad Words");
}
You can reverse the string by subtracting char codes from 0xffff to encode, then, reverse it back again to get clear text. Use "new RegExp" to construct:
var encstr = "ン゙ロテム゙フヒニテネミヘロ"; // "bad|nasty|word" put through reverse() function
var badwords = new RegExp(reverse(encstr), "i");
var Baddata1 = "bad";
var Baddata2 = "LastName";
function reverse(str) {
var sout = "", ix;
if (!str) {
return "";
}
for (ix = 0; ix < str.length; ++ix) {
sout += String.fromCharCode(0xffff - str.charCodeAt(ix));
}
return sout;
}
if (Baddata1.match(badwords))
{
checker();
FirstName.focus();
return false;
}
if (Baddata2.match(badwords))
{
checker();
LastName.focus();
return false;
}
function checker()
{
window.alert("Please Remove Bad Words");
}
Working jsfiddle here.
If you don't like using high character codes, I can easily substitute various encoding functions which don't, though this one is the most compact.
Edit: To get the reversed string, either use a JS debugger to call reverse, or, add temporary code like this:
console.log(reverse("bad|nasty|word"));
This works because reverse(reverse(string1)) === string1. reverse undoes itself.
You could also keep a list of words in a separate script, and use JS string join passed to reverse to make the list, for example:
var wordlist = ["bad", "nasty", "word"];
var joined = wordlist.join("|");
console.log('var encstr = "' + reverse(joined) + '"');
Once you've copied the string from the debug console and pasted it, the separate script could easily check that it's correct:
var encstr = "ン゙ロテム゙フヒニテネミヘロ";
alert("encstr " + (reverse(encstr) === joined ? "matches" : "does NOT match") + " original");
Edit 2: If you don't want to use high char codes that fall into international ranges, just use an encoding like base64, or this simple set:
function encodeStr(str) {
var sout = "", ix;
if (!str) {
return "";
}
for (ix = 0; ix < str.length; ++ix) {
if (sout.length)
sout += ",";
sout += str.charCodeAt(ix).toString(16);
}
return sout;
}
function decodeStr(str) {
var sout = "", narr, ix;
if (!str) {
return "";
}
narr = str.split(",");
for (ix = 0; ix < narr.length; ++ix) {
sout += String.fromCharCode(parseInt(narr[ix], 16));
}
return sout;
}
// Using encodeStr on "bad|nasty|word" makes this:
var encstr = "62,61,64,7c,6e,61,73,74,79,7c,77,6f,72,64";
var badwords = new RegExp(decodeStr(encstr), "i");