I'm working on a game where I only need to check if there's a distance of 0 or 1 between two words and return true if that's the case. I found a general purpose levenshtein distance algorithm:
function levenshtein(s, t) {
if (s === t) { return 0; }
var n = s.length, m = t.length;
if (n === 0 || m === 0) { return n + m; }
var x = 0, y, a, b, c, d, g, h, k;
var p = new Array(n);
for (y = 0; y < n;) { p[y] = ++y; }
for (;
(x + 3) < m; x += 4) {
var e1 = t.charCodeAt(x);
var e2 = t.charCodeAt(x + 1);
var e3 = t.charCodeAt(x + 2);
var e4 = t.charCodeAt(x + 3);
c = x; b = x + 1; d = x + 2; g = x + 3; h = x + 4;
for (y = 0; y < n; y++) {
k = s.charCodeAt(y);
a = p[y];
if (a < c || b < c) { c = (a > b ? b + 1 : a + 1); }
else { if (e1 !== k) { c++; } }
if (c < b || d < b) { b = (c > d ? d + 1 : c + 1); }
else { if (e2 !== k) { b++; } }
if (b < d || g < d) { d = (b > g ? g + 1 : b + 1); }
else { if (e3 !== k) { d++; } }
if (d < g || h < g) { g = (d > h ? h + 1 : d + 1); }
else { if (e4 !== k) { g++; } }
p[y] = h = g; g = d; d = b; b = c; c = a;
}
}
for (; x < m;) {
var e = t.charCodeAt(x);
c = x;
d = ++x;
for (y = 0; y < n; y++) {
a = p[y];
if (a < c || d < c) { d = (a > d ? d + 1 : a + 1); }
else {
if (e !== s.charCodeAt(y)) { d = c + 1; }
else { d = c; }
}
p[y] = d;
c = a;
}
h = d;
}
return h;
}
Which works, but this spot is going to be a hotspot and be run potentially hundreds of thousands of times a second and I want to optimize it because I don't need a general purpose algorithm, just one that checks if there's a distance of 0 or 1.
I tried writing it and came up with this:
function closeGuess(guess, word) {
if (Math.abs(word.length - guess.length) > 1) { return false; }
var errors = 0, guessIndex = 0, wordIndex = 0;
while (guessIndex < guess.length || wordIndex < word.length) {
if (errors > 1) { return false; }
if (guess[guessIndex] !== word[wordIndex]) {
if (guess.length < word.length) { wordIndex++; }
else { guessIndex++; }
errors++;
} else {
wordIndex++;
guessIndex++;
}
}
return true;
}
But after profiling it I found that my code was twice as slow, which surprised me because I think the general purpose algorithm is O(n*m) and I think mine is O(n).
I've been testing the performance difference on this fiddle: https://jsfiddle.net/aubtze2L/3/
Are there any better algorithms I can use or any way I can optimize my code to be faster?
I don't see a more elegant way which is at the same time faster than the good old for-loop:
function lev01(a, b) {
let la = a.length;
let lb = b.length;
let d = 0;
switch (la - lb) {
case 0: // mutation
for (let i = 0; i < la; ++i) {
if (a.charAt(i) != b.charAt(i) && ++d > 1) {
return false;
}
}
return true;
case -1: // insertion
for (let i = 0; i < la + d; ++i) {
if (a.charAt(i - d) != b.charAt(i) && ++d > 1) {
return false;
}
}
return true;
case +1: // deletion
for (let i = 0; i < lb + d; ++i) {
if (a.charAt(i) != b.charAt(i - d) && ++d > 1) {
return false;
}
}
return true;
}
return false;
}
console.log(lev01("abc", "abc"));
console.log(lev01("abc", "abd"));
console.log(lev01("abc", "ab"));
console.log(lev01("abc", "abcd"));
console.log(lev01("abc", "cba"));
Performance comparison (Chrome):