I've been working through "The Algorithm Design Manual" section 8.2.1 Edit Distance by Recursion. In this section Skiena writes, "We can define a recursive algorithm using the observation that the last character in the string must either be matched, substituted, inserted, or deleted." That got me wondering, why the last character? This is true for any character based on the problem definition alone. The actual Levenshtein distance algorithm makes recursive calls from the back of the strings. Why? There's no reason you couldn't do the opposite, right? Is it just a simpler, more elegant syntax?
I'm flipping the algorithm around, so it iterates from the front of the string. My attempt is below. I know my implementation doesn't work completely (ex: minDistance("industry", "interest")
returns 5 instead of 6). I've spent a couple hours trying to figure out what I'm doing wrong, but I'm not seeing it. Any help would be much appreciated.
var matchChar = (c,d) => c === d ? 0 : 1;
var minDistance = function(word1, word2) {
var stringCompare = function(s, t, i, j) {
if(i === s.length) return Math.max(t.length-s.length-1,0)
if(j === t.length) return Math.max(s.length-t.length-1,0)
if(cache[i][j] !== undefined) {
return cache[i][j]
}
let match = stringCompare(s,t,i+1,j+1) + matchChar(s[i], t[j]);
let insert = stringCompare(s,t,i,j+1) + 1;
let del = stringCompare(s,t,i+1,j) + 1;
let lowestCost = Math.min(match, insert, del)
cache[i][j] = lowestCost
return lowestCost
};
let s = word1.split('')
s.push(' ')
s = s.join('')
let t = word2.split('')
t.push(' ')
t = t.join('')
var cache = []
for(let i = 0; i < s.length; i++) {
cache.push([])
for(let j = 0; j < t.length; j++) {
cache[i].push(undefined)
}
}
return stringCompare(s, t, 0, 0)
}
The lines
if(i === s.length) return Math.max(t.length-s.length-1,0)
if(j === t.length) return Math.max(s.length-t.length-1,0)
look wrong to me. I think they should be
if(i === s.length) return t.length-j
if(j === t.length) return s.length-i