I have a bunch of data classes that I'm trying to build from a dictionary of parsed XML. The keys in this XML have been obfuscated so I've had to go through tedious amounts of documentation, written in a language I have only passing familiarity with, to be able to construct this structure. Right, now I have something like this:
@dataclass
class Record:
point_id: str
generator_id: str
generator_name: str
control: str
success: bool
amount: int
notes: str
@classmethod
def from_xml(cls, xml: dict) -> FixedGenerationRecord:
return cls(
xml["JP06400"],
value_or_empty(xml, "JP06119"),
value_or_empty(xml, "JP06120"),
xml["JP06121"],
xml["JP06122"] == "0",
int(xml["JP06123"]),
value_or_empty(xml, "JP06124"),
)
Needless to say, doing this for every class is tedious, so I thought of handling this automatically through a system of decorators:
def parse_xml(cls):
class Wrapper:
def __init__(self, *args, **kwargs):
self._inst = cls(*args, **kwargs)
self.commands = []
def __getattribute__(self, attrib):
try:
obj = super().__getattribute__(attrib)
return obj
except AttributeError:
# will handle the situation below
pass
return self._inst.__getattribute__(attrib)
def parse_xml(self, xml):
for command in self.commands:
command(self._inst, xml)
return Wrapper
The only thing this is missing is a decorator that could be added to each field I wanted to set in the parse_xml
method. Ideally, this decorator would create a setter for the data-class property and add it to self.commands
. Only, I'm not sure how to do that. How do I create a property-level decorator that can modify a value in a class-level decorator?
I would approach this problem by using descriptors.
At their most basic, descriptors let you customize how you get, set, and delete attributes. Because descriptors are objects living on the class (not the instance), you can also influence deeper behavior, for example, automated value extraction from a data source.
I've made some assumptions about how you're piping in the XML data, but the following should be a decent starting point for your project. Please note that this does not use the @dataclass
decorator, as that's not really built for custom class attribute types, but we can make dataclass-like objects all the same.
from typing import Any
class XmlProperty:
def __init__(self, xml_key: str, dtype: type, allow_empty: bool = False):
"""Constructor.
Args:
xml_key: The key in the XML data corresposonding to the property.
dtype: The data type to cast the parsed data.
allow_empty: True if an empty/missing value is acceptable.
"""
self._xml_key = xml_key
self._dtype = dtype
self._allow_empty = allow_empty
self._value = None
def parse(self, obj: object, data: dict):
"""Parses XML data and populates the property.
Args:
obj: The parent object.
data: The XML data to parse.
"""
value = None
if self._xml_key in data:
raw_value = data[self._xml_key]
value = self._dtype(raw_value)
elif not self._allow_empty:
raise KeyError(f"XML data is missing property {self._xml_key}")
self.__set__(obj, value)
def __set_name__(self, owner: object, name: str):
"""Sets the descriptor name."""
self.public_name = name
self.private_name = "_" + name
def __get__(self, obj: object, objtype: type = None) -> Any:
"""Gets the descriptor value."""
if obj is None:
# This is for descriptor access from the class itself, where we
# want to access the descriptor object and not the contained value.
return self
return getattr(obj, self.private_name)
def __set__(self, obj: object, value: Any):
"""Set's the descriptor's value."""
setattr(obj, self.private_name, value)
class XmlRecord:
"""Base class for all XML data classes."""
def __init__(self, data: dict):
"""Constructor.
Args:
data: The data to parse.
"""
self._xml_properties = {
name: prop
for name, prop in self.__class__.__dict__.items()
if isinstance(prop, XmlProperty)
}
for property in self._xml_properties.values():
property.parse(self, data)
def __repr__(self) -> str:
"""Returns a descriptive string representation of the instance."""
cls = self.__class__.__name__
property_descriptions = []
for name in self._xml_properties:
value = getattr(self, name)
property_descriptions.append(f"{name}={value}")
description = ", ".join(property_descriptions)
return f"{cls}({description})"
class BasicRecord(XmlRecord):
"""Dataclass for a basic record."""
number: float = XmlProperty("J1000", float)
integer: int = XmlProperty("F50", int)
maybe_string: str = XmlProperty("S55", str, allow_empty=True)
# Create new data classes as needed
all_data = {
"J1000": 5.12,
"F50": 3,
"S55": "block_3",
}
first = BasicRecord(all_data)
print(first)
# BasicRecord(number=5.12, integer=3, maybe_string=block_3)
# Because each XmlProperty can also cast parsed values to new types,
# compatible data is converted to their expected type.
messy_data = {"J1000": 67, "F50": 12.0}
second = BasicRecord(messy_data)
print(second)
# BasicRecord(number=67.0, integer=12, maybe_string=None)
EDIT: You also get some handy benefits from casting extracted data types, so you can easily represented nested data as well.
class Location(XmlRecord):
state: str = XmlProperty("S1", str)
city: str = XmlProperty("S2", str)
class Employee(XmlRecord):
first_name: str = XmlProperty("N1", str)
last_name: str = XmlProperty("N2", str)
def employee_list(data: List[dict]) -> List[Employee]:
return [Employee(item) for item in data]
class CompanyData(XmlRecord):
name: str = XmlProperty("CC68", str)
slogan: str = XmlProperty("S57M", str)
location: Location = XmlProperty("L56", Location)
employees: List[Employee] = XmlProperty("CE3", employee_list)
data = {
"CC68": "Net Technologies",
"S57M": "Making real connections",
"L56": {
"S1": "Colorado",
"S2": "Boulder",
},
"CE3": [
{
"N1": "Jim",
"N2": "Thompson"
},
{
"N1": "Annie",
"N2": "Peterson"
}
]
}
company = CompanyData(data)
print(company)
# CompanyData(name=Net Technologies, slogan=Making real connections, location=Location(state=Colorado, city=Boulder), employees=[Employee(first_name=Jim, last_name=Thompson), Employee(first_name=Annie, last_name=Peterson)])