I have this primary schema for each one of my data points:
data_schema = {
"type": "object",
"properties": {
"name": {"type": "string"},
"city": {"type": "string"},
"distance": {"type": "number"},
"extrovert": {"type": "boolean"},
"favorite_temperature": {"type": "number"},
},
}
and then I expect a list of these called data
. I want to validate them all, how can this be done using the jsonschema
module?
This is my attempt which fails because it seems to not recognize my custom primary schema inside the array schema:
from jsonschema import validate
from time import time
from faker import Faker
import numpy as np
Faker.seed(0)
fake = Faker()
def create_data(x: int):
'''Create fake data'''
# dictionary
data = []
for i in range(0, x):
data_i = {}
data_i['name']= fake.name()
data_i['city']= fake.city()
data_i['distance'] = np.random.randint(1,5)
data_i['extrovert'] = fake.pybool()
data_i['favorite_temperature'] = fake.pyfloat(left_digits=2,
right_digits=2)
data.append(data_i)
return data
data = create_data(3)
t0 = time()
data_schema = {
"type": "object",
"properties": {
"name": {"type": "string"},
"city": {"type": "string"},
"distance": {"type": "number"},
"extrovert": {"type": "boolean"},
"favorite_temperature": {"type": "number"},
},
}
list_schema = {
"type": "array",
"items": {"type": data_schema},
}
validated_data = validate(instance=data, schema=list_schema)
t1 = time()
duration = t1 - t0
print(f"Json_Schema validation lasted: {duration} sec")
This is the error I get:
jsonschema.exceptions.SchemaError: {'type': 'object', 'properties': {'name': {'type': 'string'}, 'city': {'type': 'string'}, 'closeness (1-5)': {'type': 'number'}, 'extrovert': {'type': 'boolean'}, 'favorite_temperature': {'type': 'number'}}} is not valid under any of the given schemas
"items": {"type": data_schema}
should just be "items": data_schema
.