I've been toying around with some compression algorithms lately but, for the last couple days, I've been having some real trouble implementing LZ78 in python. I've looked around online for some examples but haven't really found anything reliable that both encodes and decodes input. Does anyone have any resources I could look at or code I could examine?
Thanks
Here is both compression and decompression of both :
def compress(uncompressed):
"""Compress a string to a list of output symbols."""
# Build the dictionary.
dict_size = 256
dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size))
# in Python 3: dictionary = {chr(i): chr(i) for i in range(dict_size)}
w = ""
result = []
for c in uncompressed:
wc = w + c
if wc in dictionary:
w = wc
else:
result.append(dictionary[w])
# Add wc to the dictionary.
dictionary[wc] = dict_size
dict_size += 1
w = c
# Output the code for w.
if w:
result.append(dictionary[w])
return result
def decompress(compressed):
"""Decompress a list of output ks to a string."""
from cStringIO import StringIO
# Build the dictionary.
dict_size = 256
dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size))
# in Python 3: dictionary = {chr(i): chr(i) for i in range(dict_size)}
# use StringIO, otherwise this becomes O(N^2)
# due to string concatenation in a loop
result = StringIO()
w = compressed.pop(0)
result.write(w)
for k in compressed:
if k in dictionary:
entry = dictionary[k]
elif k == dict_size:
entry = w + w[0]
else:
raise ValueError('Bad compressed k: %s' % k)
result.write(entry)
# Add w+entry[0] to the dictionary.
dictionary[dict_size] = w + entry[0]
dict_size += 1
w = entry
return result.getvalue()
# How to use:
compressed = compress('TOBEORNOTTOBEORTOBEORNOT')
print (compressed)
decompressed = decompress(compressed)
print (decompressed)
Hope this helps