Search code examples
pythonjsonschemapydantic

Pydantic model for JSON Meta Schema


I have a use case where user needs to define some JSON Schema for later usage. Right know I am using Pydantic parse user configs and check if they are ok.

Does any one know if there exist an library with Pydantic Model for JSON Meta Schema? Even better would be if it could parse JSON Schema into Pydantic Model at runtime.


Solution

  • One solution is to hack the utils out of datamodel-code-generator, specifically their JsonSchemaParser. This generates an intermediate text representation of all pydantic models which you can then dynamically import. You might reasonably balk at this, but it does allow for self-referencing and multi-model setups at least:

    import importlib.util
    import json
    import re
    import sys
    from contextlib import contextmanager
    from pathlib import Path
    from tempfile import NamedTemporaryFile
    from types import ModuleType
    
    from datamodel_code_generator.parser.jsonschema import JsonSchemaParser
    from pydantic import BaseModel
    
    
    NON_ALPHANUMERIC = re.compile(r"[^a-zA-Z0-9]+")
    UPPER_CAMEL_CASE = re.compile(r"[A-Z][a-zA-Z0-9]+")
    LOWER_CAMEL_CASE = re.compile(r"[a-z][a-zA-Z0-9]+")
    
    class BadJsonSchema(Exception):
        pass
    
    
    def _to_camel_case(name: str) -> str:
        if any(NON_ALPHANUMERIC.finditer(name)):
            return "".join(term.lower().title() for term in NON_ALPHANUMERIC.split(name))
        if UPPER_CAMEL_CASE.match(name):
            return name
        if LOWER_CAMEL_CASE.match(name):
            return name[0].upper() + name[1:]
        raise BadJsonSchema(f"Unknown case used for {name}")
    
    
    def _load_module_from_file(file_path: Path) -> ModuleType:
        spec = importlib.util.spec_from_file_location(
            name=file_path.stem, location=str(file_path)
        )
        module = importlib.util.module_from_spec(spec)
        sys.modules[file_path.stem] = module
        spec.loader.exec_module(module)
        return module
    
    
    @contextmanager
    def _delete_file_on_completion(file_path: Path):
        try:
            yield
        finally:
            file_path.unlink(missing_ok=True)
    
    
    def json_schema_to_pydantic_model(json_schema: dict, name_override: str) -> BaseModel:
        json_schema_as_str = json.dumps(json_schema)
        pydantic_models_as_str: str = JsonSchemaParser(json_schema_as_str).parse()
    
        with NamedTemporaryFile(suffix=".py", delete=False) as temp_file:
            temp_file_path = Path(temp_file.name).resolve()
            temp_file.write(pydantic_models_as_str.encode())
    
        with _delete_file_on_completion(file_path=temp_file_path):
            module = _load_module_from_file(file_path=temp_file_path)
    
        main_model_name = _to_camel_case(name=json_schema["title"])
        pydantic_model: BaseModel = module.__dict__[main_model_name]
        # Override the pydantic model/parser name for nicer ValidationError messaging and logging
        pydantic_model.__name__ = name_override
        pydantic_model.parse_obj.__func__.__name__ = name_override
        return pydantic_model
    

    Main drawback as I see it- datamodel-code-generator has non-dev dependencies isort and black- not ideal to have in your deployments.