Search code examples
pythondiff

python diff SequenceMatcher - patching a list


i patch a list to look like another:

a = [x for x in "qabxcd"]
b = [x for x in "abycdf"]
c = a[:]
s = SequenceMatcher(None, a, b)
for tag, i1, i2, j1, j2 in s.get_opcodes():
    print ("%7s a[%d:%d] (%s) b[%d:%d] (%s)" % 
    (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2]))
    if tag == "delete":
        del c[i1:i2]
    elif tag == "replace":
        c[i1:i2] = b[j1-1:j2-1]
    elif tag == "insert":
        c[i1:i2] = b[j1:j2]
print c
print b
print c == b
a == b

but the list is not equal:

 delete a[0:1] (['q']) b[0:0] ([])
  equal a[1:3] (['a', 'b']) b[0:2] (['a', 'b'])
replace a[3:4] (['x']) b[2:3] (['y'])
  equal a[4:6] (['c', 'd']) b[3:5] (['c', 'd'])
 insert a[6:6] ([]) b[5:6] (['f'])
['a', 'b', 'x', 'b', 'd', 'f']
['a', 'b', 'y', 'c', 'd', 'f']
False

what is the problem?


Solution

  • All the action shift the indexes. When i will to do it, i must count the changes:

    a = [x for x in "abyffgh fg99"]
    b = [x for x in "999aby99ff9h9"]
    c = a[:]
    
    s = SequenceMatcher(None, a, b)
    
    i = 0
    for tag, i1, i2, j1, j2 in s.get_opcodes():
        print ("%7s a[%d:%d] (%s) b[%d:%d] (%s) c[%d:%d] (%s)" % 
        (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2], i1, i2, c[i1 + i:i2 + i]))
        if tag == "delete":
            del c[i1 + i:i2 + i]
            i -= i2 - i1
        elif tag == "replace":
            c[i1 + i:i2 + i] = b[j1:j2]
            i -= i2 - i1 - j2 + j1
        elif tag == "insert":
            c[i1 + i:i2 + i] = b[j1:j2]
            i += j2 - j1
        print c
        print i
    print c
    print b
    print c == b
    a == b
    

    output:

    ['9', '9', '9', 'a', 'b', 'y', '9', '9', 'f', 'f', '9', 'h', ' ', 'f', 'g', '9', '9']
    5
     delete a[7:10] ([' ', 'f', 'g']) b[12:12] ([]) c[7:10] ([' ', 'f', 'g'])
    ['9', '9', '9', 'a', 'b', 'y', '9', '9', 'f', 'f', '9', 'h', '9', '9']
    1
      equal a[10:11] (['9']) b[12:13] (['9']) c[10:11] (['h'])
    ['9', '9', '9', 'a', 'b', 'y', '9', '9', 'f', 'f', '9', 'h', '9', '9']
    1
     delete a[11:12] (['9']) b[13:13] ([]) c[11:12] (['9'])
    ['9', '9', '9', 'a', 'b', 'y', '9', '9', 'f', 'f', '9', 'h', '9']
    -1
    ['9', '9', '9', 'a', 'b', 'y', '9', '9', 'f', 'f', '9', 'h', '9']
    ['9', '9', '9', 'a', 'b', 'y', '9', '9', 'f', 'f', '9', 'h', '9']
    True