Currently, this is my code.
function clean_string(raw_string) {
A =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz 1234567890".split(
""
);
var cleaned_string = raw_string.toLowerCase();
for (i = 0; i < cleaned_string.length; i++) {
if (!A.includes(cleaned_string[i])) {
cleaned_string = setCharAt(cleaned_string, i, " ");
}
}
cleaned_string = cleaned_string.replace(/\s\s+/g, " ");
return cleaned_string;
}
function setCharAt(str, index, chr) {
if (index > str.length - 1) return str;
return str.substring(0, index) + chr + str.substring(index + 1);
}
I don't know regex and it'll probably be easier with regex. Here's what I want to do:
Input: Hello, David World 123!
Output: hello david world 123
.
Input: hELlo., <>;dAVId world .;- 123
Output: hello david world 123
.
Input: He.llo David, w!orld 123#
Output: he llo david w orld 123
.
Basically what I want to do is replace anything but a-z0-9 with a space and then remove double spaces. In other words, I only want a-z0-9 in my results. How can I do that?
P.S. The code works but I think it looks bad and pretty inefficient.
EDIT: Sorry, I meant I only want lowercase letters in my output. I'm dumb.
A simple solution would be to convert all characters to lowercase, replace any character that isn't a-z, 0-9, or a space with a space character, and then replace multiple space characters with a single space character.
function sanitize(input) {
return input
.toLowerCase()
.replace(/([^a-z\d\s]+)/g, ' ')
.replace(/(\s+)/g, ' ');
}
console.log(sanitize('Hello, David World 123!'));
console.log(sanitize('hELlo., <>;dAVId world .;- 123'));
console.log(sanitize('He.llo David, w!orld 123#'));