Search code examples
pythonalgorithmcompression

Implementing the LZ78 compression algorithm in python


I've been toying around with some compression algorithms lately but, for the last couple days, I've been having some real trouble implementing LZ78 in python. I've looked around online for some examples but haven't really found anything reliable that both encodes and decodes input. Does anyone have any resources I could look at or code I could examine?

Thanks


Solution

  • Here is both compression and decompression of both :

    def compress(uncompressed):
        """Compress a string to a list of output symbols."""
    
        # Build the dictionary.
        dict_size = 256
        dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size))
        # in Python 3: dictionary = {chr(i): chr(i) for i in range(dict_size)}
    
        w = ""
        result = []
        for c in uncompressed:
            wc = w + c
            if wc in dictionary:
                w = wc
            else:
                result.append(dictionary[w])
                # Add wc to the dictionary.
                dictionary[wc] = dict_size
                dict_size += 1
                w = c
    
        # Output the code for w.
        if w:
            result.append(dictionary[w])
        return result
    
    
    def decompress(compressed):
        """Decompress a list of output ks to a string."""
        from cStringIO import StringIO
    
        # Build the dictionary.
        dict_size = 256
        dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size))
        # in Python 3: dictionary = {chr(i): chr(i) for i in range(dict_size)}
    
        # use StringIO, otherwise this becomes O(N^2)
        # due to string concatenation in a loop
        result = StringIO()
        w = compressed.pop(0)
        result.write(w)
        for k in compressed:
            if k in dictionary:
                entry = dictionary[k]
            elif k == dict_size:
                entry = w + w[0]
            else:
                raise ValueError('Bad compressed k: %s' % k)
            result.write(entry)
    
            # Add w+entry[0] to the dictionary.
            dictionary[dict_size] = w + entry[0]
            dict_size += 1
    
            w = entry
        return result.getvalue()
    
    
    # How to use:
    compressed = compress('TOBEORNOTTOBEORTOBEORNOT')
    print (compressed)
    decompressed = decompress(compressed)
    print (decompressed)
    

    Hope this helps