I am using dacite to transform a Python dictionary into a dataclass. Is there a way to dynamically add fields to a dataclass? Like in the example below, where the dataclass "Parameters" has defined only one timeseries "timeseriesA", but there might be additional ones (provided through the dictionary) that cannot be declared.
from dataclasses import asdict, dataclass
from typing import Dict, List, Optional
from dacite import from_dict
@dataclass(frozen = True)
class TimeSeries:
name: str
unit: str
data: Optional[List[float]]
@dataclass(frozen = True)
class Parameters:
timeseriesA: TimeSeries
@dataclass(frozen = True)
class Data:
parameters: Parameters
@classmethod
def fromDict(cls, data: Dict) -> 'Data':
return from_dict(cls, data)
@classmethod
def toDict(cls) -> Dict:
return asdict(cls)
def main() -> None:
d: Dict = {
'parameters': {
'timeseriesA': {
'name': 'nameA',
'unit': 'USD',
'data': [10, 20, 30, 40]
},
'timeseriesB': {
'name': 'nameB',
'unit': 'EUR',
'data': [60, 30, 40, 50]
}
}
}
data: Data = Data.fromDict(d)
if __name__ == '__main__':
main()
In this example, "timeseriesB" will be ignored by dacite, but should be added as field for the "Parameters" dataclass.
In general, dynamically adding fields to a dataclass, after the class is defined, is not good practice. However, this does present a good use case for using a dict
within a dataclass, due to the dynamic nature of fields in the source dict
object.
Here is a straightforward example of using a dict
field to handle a dynamic mapping of keys in the source object, using the dataclass-wizard
which is also a similar JSON serialization library. The approach outlined below handles extraneous data in the dict object like timeseriesB
for instance.
from __future__ import annotations
from dataclasses import dataclass
from dataclass_wizard import JSONWizard
@dataclass(frozen=True)
class Data(JSONWizard):
parameters: dict[str, TimeSeries]
@dataclass(frozen=True)
class TimeSeries:
name: str
unit: str
data: list[float] | None
data: dict = {
'parameters': {
'timeseriesA': {
'name': 'nameA',
'unit': 'USD',
'data': [10, 20, 30, 40]
},
'timeseriesB': {
'name': 'nameB',
'unit': 'EUR',
'data': [60, 30, 40, 50]
}
}
}
def main():
# deserialize from dict
d = Data.from_dict(data)
print(d.parameters['timeseriesB'].unit) # EUR
print(repr(d))
# Data(parameters={'timeseriesA': TimeSeries(name='nameA', unit='USD', data=[10.0, 20.0, 30.0, 40.0]),
# 'timeseriesB': TimeSeries(name='nameB', unit='EUR', data=[60.0, 30.0, 40.0, 50.0])})
if __name__ == '__main__':
main()
The dataclass-wizard
admittedly doesn't perform strict type checking like dacite
, but instead performs implicit type coercion, like str
to annotated int
, where possible. Perhaps as a result, it's overall much faster; the other nice thing is serialization is even slightly faster than builtin dataclasses.asdict
too :-)
Here are some quick tests:
from dataclasses import asdict, dataclass
from typing import Dict, List, Optional
from dacite import from_dict
from dataclass_wizard import JSONWizard
from timeit import timeit
@dataclass(frozen=True)
class TimeSeries:
name: str
unit: str
data: Optional[List[float]]
@dataclass(frozen=True)
class Parameters:
timeseriesA: TimeSeries
@dataclass(frozen=True)
class Data:
parameters: Parameters
@classmethod
def fromDict(cls, data: Dict) -> 'Data':
return from_dict(cls, data)
def toDict(self) -> Dict:
return asdict(self)
@dataclass(frozen=True)
class ParametersWizard:
# renamed because default key transform is `camelCase` -> `snake_case`
timeseries_a: TimeSeries
@dataclass(frozen=True)
class DataWizard(JSONWizard):
# enable debug mode in case of incorrect types etc.
class _(JSONWizard.Meta):
debug_enabled = True
parameters: ParametersWizard
data: Dict = {
'parameters': {
'timeseriesA': {
'name': 'nameA',
'unit': 'USD',
'data': [10, 20, 30, 40]
},
'timeseriesB': {
'name': 'nameB',
'unit': 'EUR',
'data': [60, 30, 40, 50]
}
}
}
def main():
n = 10_000
print(f"From Dict: {timeit('Data.fromDict(data)', globals=globals(), number=n):.3f}")
print(f"From Dict (Wiz): {timeit('DataWizard.from_dict(data)', globals=globals(), number=n):.3f}")
data_1: Data = Data.fromDict(data)
data_wiz: Data = DataWizard.from_dict(data)
g = globals().copy()
g.update(locals())
print(f"To Dict: {timeit('data_1.toDict()', globals=g, number=n):.3f}")
print(f"To Dict (Wiz): {timeit('data_wiz.to_dict()', globals=g, number=n):.3f}")
if __name__ == '__main__':
main()
Results, on my PC (Windows):
From Dict: 1.663
From Dict (Wiz): 0.059
To Dict: 0.105
To Dict (Wiz): 0.057