I have a use case where user needs to define some JSON Schema for later usage. Right know I am using Pydantic parse user configs and check if they are ok.
Does any one know if there exist an library with Pydantic Model for JSON Meta Schema? Even better would be if it could parse JSON Schema into Pydantic Model at runtime.
One solution is to hack the utils out of datamodel-code-generator, specifically their JsonSchemaParser
. This generates an intermediate text representation of all pydantic models which you can then dynamically import. You might reasonably balk at this, but it does allow for self-referencing and multi-model setups at least:
import importlib.util
import json
import re
import sys
from contextlib import contextmanager
from pathlib import Path
from tempfile import NamedTemporaryFile
from types import ModuleType
from datamodel_code_generator.parser.jsonschema import JsonSchemaParser
from pydantic import BaseModel
NON_ALPHANUMERIC = re.compile(r"[^a-zA-Z0-9]+")
UPPER_CAMEL_CASE = re.compile(r"[A-Z][a-zA-Z0-9]+")
LOWER_CAMEL_CASE = re.compile(r"[a-z][a-zA-Z0-9]+")
class BadJsonSchema(Exception):
pass
def _to_camel_case(name: str) -> str:
if any(NON_ALPHANUMERIC.finditer(name)):
return "".join(term.lower().title() for term in NON_ALPHANUMERIC.split(name))
if UPPER_CAMEL_CASE.match(name):
return name
if LOWER_CAMEL_CASE.match(name):
return name[0].upper() + name[1:]
raise BadJsonSchema(f"Unknown case used for {name}")
def _load_module_from_file(file_path: Path) -> ModuleType:
spec = importlib.util.spec_from_file_location(
name=file_path.stem, location=str(file_path)
)
module = importlib.util.module_from_spec(spec)
sys.modules[file_path.stem] = module
spec.loader.exec_module(module)
return module
@contextmanager
def _delete_file_on_completion(file_path: Path):
try:
yield
finally:
file_path.unlink(missing_ok=True)
def json_schema_to_pydantic_model(json_schema: dict, name_override: str) -> BaseModel:
json_schema_as_str = json.dumps(json_schema)
pydantic_models_as_str: str = JsonSchemaParser(json_schema_as_str).parse()
with NamedTemporaryFile(suffix=".py", delete=False) as temp_file:
temp_file_path = Path(temp_file.name).resolve()
temp_file.write(pydantic_models_as_str.encode())
with _delete_file_on_completion(file_path=temp_file_path):
module = _load_module_from_file(file_path=temp_file_path)
main_model_name = _to_camel_case(name=json_schema["title"])
pydantic_model: BaseModel = module.__dict__[main_model_name]
# Override the pydantic model/parser name for nicer ValidationError messaging and logging
pydantic_model.__name__ = name_override
pydantic_model.parse_obj.__func__.__name__ = name_override
return pydantic_model
Main drawback as I see it- datamodel-code-generator
has non-dev dependencies isort
and black
- not ideal to have in your deployments.