Search code examples
javascriptalgorithmoptimizationtime-complexitylevenshtein-distance

How to optimize levenshtein distance for checking for a distance of 1?


I'm working on a game where I only need to check if there's a distance of 0 or 1 between two words and return true if that's the case. I found a general purpose levenshtein distance algorithm:

function levenshtein(s, t) {
  if (s === t) { return 0; }
  var n = s.length, m = t.length;
  if (n === 0 || m === 0) { return n + m; }
  var x = 0, y, a, b, c, d, g, h, k;
  var p = new Array(n);
  for (y = 0; y < n;) { p[y] = ++y; }
  for (;
    (x + 3) < m; x += 4) {
    var e1 = t.charCodeAt(x);
    var e2 = t.charCodeAt(x + 1);
    var e3 = t.charCodeAt(x + 2);
    var e4 = t.charCodeAt(x + 3);
    c = x; b = x + 1; d = x + 2; g = x + 3; h = x + 4;

    for (y = 0; y < n; y++) {
      k = s.charCodeAt(y);
      a = p[y];

      if (a < c || b < c) { c = (a > b ? b + 1 : a + 1); }
      else { if (e1 !== k) { c++; } }

      if (c < b || d < b) { b = (c > d ? d + 1 : c + 1); } 
      else { if (e2 !== k) { b++; } }

      if (b < d || g < d) { d = (b > g ? g + 1 : b + 1); } 
      else { if (e3 !== k) { d++; } }

      if (d < g || h < g) { g = (d > h ? h + 1 : d + 1); }
      else { if (e4 !== k) { g++; } }

      p[y] = h = g; g = d; d = b; b = c; c = a;
    }
  }

  for (; x < m;) {
    var e = t.charCodeAt(x);
    c = x;
    d = ++x;
    for (y = 0; y < n; y++) {
      a = p[y];
      if (a < c || d < c) { d = (a > d ? d + 1 : a + 1); } 
      else {
        if (e !== s.charCodeAt(y)) { d = c + 1; }
        else { d = c; }
      }
      p[y] = d;
      c = a;
    }
    h = d;
  }

  return h;
}

Which works, but this spot is going to be a hotspot and be run potentially hundreds of thousands of times a second and I want to optimize it because I don't need a general purpose algorithm, just one that checks if there's a distance of 0 or 1.

I tried writing it and came up with this:

function closeGuess(guess, word) {
  if (Math.abs(word.length - guess.length) > 1) { return false; }

  var errors = 0, guessIndex = 0, wordIndex = 0;

  while (guessIndex < guess.length || wordIndex < word.length) {
    if (errors > 1) { return false; }
    if (guess[guessIndex] !== word[wordIndex]) {
      if (guess.length < word.length) { wordIndex++; }
      else { guessIndex++; }
      errors++;
    } else {
      wordIndex++;
      guessIndex++;
    }
  }

  return true;
}

But after profiling it I found that my code was twice as slow, which surprised me because I think the general purpose algorithm is O(n*m) and I think mine is O(n).

I've been testing the performance difference on this fiddle: https://jsfiddle.net/aubtze2L/3/

Are there any better algorithms I can use or any way I can optimize my code to be faster?


Solution

  • I don't see a more elegant way which is at the same time faster than the good old for-loop:

    function lev01(a, b) {
      let la = a.length;
      let lb = b.length;
      let d = 0;
      switch (la - lb) {
        case 0: // mutation
          for (let i = 0; i < la; ++i) {
            if (a.charAt(i) != b.charAt(i) && ++d > 1) {
              return false;
            }
          }
          return true;
        case -1: // insertion
          for (let i = 0; i < la + d; ++i) {
            if (a.charAt(i - d) != b.charAt(i) && ++d > 1) {
              return false;
            }
          }
          return true;
        case +1: // deletion
          for (let i = 0; i < lb + d; ++i) {
            if (a.charAt(i) != b.charAt(i - d) && ++d > 1) {
              return false;
            }
          }
          return true;
      }
      return false;
    }
    
    console.log(lev01("abc", "abc"));
    console.log(lev01("abc", "abd"));
    console.log(lev01("abc", "ab"));
    console.log(lev01("abc", "abcd"));
    console.log(lev01("abc", "cba"));

    Performance comparison (Chrome):

    • 80.33ms - lev01 (this answer)
    • 234.84ms - lev
    • 708.12ms - close