Search code examples
python-3.xsyspython-zipfilepython-importlib

Load pure Python module from in-memory zipfile


From this, I was able to make this:

import os
import types
import zipfile
import sys
import io

class ZipImporter(object):
    def __init__(self, zip_file):
        self.zfile = zip_file
        self._paths = [x.filename for x in self.zfile.filelist]
        
    def _mod_to_paths(self, fullname):
        # get the python module name
        py_filename = fullname.replace(".", os.sep) + ".py"
        # get the filename if it is a package/subpackage
        py_package = fullname.replace(".", os.sep) + "/__init__.py"
        print(py_package)
        if py_filename in self._paths:
            return py_filename
        elif py_package in self._paths:
            return py_package
        else:
            return None

    def find_module(self, fullname, path):
        if self._mod_to_paths(fullname) is not None:
            return self
        return None

    def load_module(self, fullname):
        filename = self._mod_to_paths(fullname)
        if not filename in self._paths:
            raise ImportError(fullname)
        new_module = types.ModuleType(fullname)
        new_module.__name__=fullname
        print(fullname)
        exec(self.zfile.open(filename, 'r').read(),new_module.__dict__)
        new_module.__file__ = filename
        new_module.__loader__ = self
        if filename.endswith("__init__.py"):
            new_module.__path__ = [] 
            new_module.__package__ = fullname
        else:
            new_module.__package__ = fullname.rpartition('.')[0]
        sys.modules[fullname]=new_module
        return new_module

module_zip=zipfile.ZipFile(io.BytesIO(),"w")
for key in module_dict:
    module_zip.writestr(key,module_dict[key])
sys.meta_path.append(ZipImporter(module_zip))

import pyparsing

Using the source code of pyparsing as a test. However, it fails with ImportError: attempted relative import with no known parent package. Even if I replace all the relative imports with absolute imports, it fails with RecursionError: maximum recursion depth exceeded while calling a Python object, as it tries to import pyparsing repeatedly. Is there something fundamental I'm not understanding about the way Python's import system works?


Solution

  • I found the answer --- PEP 302 says that:

    Note that the module object must be in sys.modules before the loader executes the module code. This is crucial because the module code may (directly or indirectly) import itself; adding it to sys.modules beforehand prevents unbounded recursion in the worst case and multiple loading in the best.