Search code examples
pythontwisted

Zipping files in Twistd without blocking.


Is there a way to zip files without blocking in twisted?

import zipfile
from twisted.internet import defer
from twisted.internet import reactor

def zip_files(file_list, path, output_Zip):
    zip_handle = zipfile.ZipFile(output_zip,  mode='w', allowZip64=True)
    try:
        for i in file_list:
            zip_handle.write(i)
        zip_handle.close()
        return True
    except Exception as e:
        return False
def print_zip(res):
    print res
    return res


file_list = ['path_to_file1','path_to_file2']
output_path = 'full_path_to_output_zip'
d = defer.Deferred()
d.addCallback(lambda _: zip_files(file_list, output_path)
d.addCallback(print_zip)
zip_result = d
reactor.run()

I have this so far. While it does work, triggering the zipping process causes twisted to block and wait until the initial 'zip job' is finished. Id rather it terminate the existing 'zip job' and start the new one.


Solution

  • Perhaps something like this, using a DeferredList of deferToThreads to not block on writing the zip files:

    import zipfile
    import logging
    from twisted.internet import threads, defer
    from twisted.internet import reactor
    
    log = logging.getLogger()
    log.addHandler(logging.StreamHandler())
    log.setLevel(logging.INFO)
    
    
    def zip_file(input_path, output_path):
        with zipfile.ZipFile(output_path,  mode='w', allowZip64=True) as zip_handle:
            zip_handle.write(input_path)
    
    
    def log_failure(err):
        log.exception("error: %s", err)
    
    
    def zip_file_and_catch_error(input_path, output_path):
        d = threads.deferToThread(zip_file, input_path, output_path)
        d.addErrback(log_failure)
        return d
    
    
    def main():
        input_paths = ['path_to_file1','path_to_file2']
        output_paths = ['path_out1','path_out2']
        assert len(input_paths) == len(output_paths)
        dl = defer.DeferredList([zip_file_and_catch_error(input_path, output_path) 
                                 for input_path, output_path in zip(input_paths, output_paths)])
        dl.addCallback(lambda result: log.info("result: %s", result))
        dl.addBoth(lambda _: reactor.callLater(0, reactor.stop))
        reactor.run()
    
    
    if __name__ == "__main__":
        main()