I am working on a project where I need objects with a few attributes (e.g. date, name) and then a collection of values (normally pd.DataFrames with different aggregates, but I will use integers as an example here). Instead of doing multiple nested dicts and typing many extra characters for the keys like data["df"]["yearly"], it would be easier to access the values with data.df.yearly. (is there a better structure?)
Thus I use a class which sets the keys as attributes. But I get different outputs depending on how I access that data.
Here is the code:
class Map(dict):
"""
https://stackoverflow.com/questions/2352181/how-to-use-a-dot-to-access-members-of-dictionary
Example:
m = Map({'first_name': 'Eduardo'}, last_name='Pool', age=24, sports=['Soccer'])
"""
def __init__(self, *args, **kwargs):
super(Map, self).__init__(*args, **kwargs)
for arg in args:
if isinstance(arg, dict):
for k, v in arg.items():
self[k] = v
if kwargs:
for k, v in kwargs.items():
self[k] = v
def __getattr__(self, attr):
super( Map, self ).__getattr__( attr )
return self.get(attr)
def __setattr__(self, key, value):
self.__setitem__(key, value)
def __setitem__( self, key, value ):
super( Map, self ).__setitem__( key, value )
if isinstance( value, dict ):
self.__dict__.update( {key: Map(value) } )
else:
self.__dict__.update( {key: value} )
def __delattr__(self, item):
self.__delitem__(item)
def __delitem__(self, key):
super(Map, self).__delitem__(key)
del self.__dict__[key]
Now when I initialize an instance and assign some data to it in the following way, I will have two different outputs depending on if I write data.df.yearly or data.df["yearly"].
Does someone see the mistake (or is there a suggestion for a better data structure?).
Create an instance of Map()
data = Map()
data.start_date = "2020-01-01"
data.finish_date = '2021-01-01'
data.name = "Type A"
data.df = {"yearly" : 0, "monthly" : 0, "weekly" : 0}
Change the values of data.df
for time in data.df.keys():
data.df[time] = 123
Expected output:
data
> {'start_date': '2020-01-01',
> 'finish_date': '2021-01-01',
> 'name': 'Type A',
> 'df': {'yearly': 123, 'monthly': 123, 'weekly': 123}}
Output (same with print(data)):
data
> {'start_date': '2020-01-01',
> 'finish_date': '2021-01-01',
> 'name': 'Type A',
> 'df': {'yearly': 0, 'monthly': 0, 'weekly': 0}}
Output via dict matches epected output:
data.__dict__
> {'start_date': '2020-01-01',
> 'finish_date': '2021-01-01',
> 'name': 'Type A',
> 'df': {'yearly': 123, 'monthly': 123, 'weekly': 123}}
Your __setitem__
method is calling the superclass method with the original value, but if the value if a dictionary it converts it to a Map
when putting it into self.__dict__
. So these two locations contain different objects, and nothing keeps them in sync.
You should convert the value to a Map
before storing it in both places.
def __setitem__( self, key, value ):
if isinstance(value, dict):
value = Map(value)
super( Map, self ).__setitem__( key, value )
self.__dict__.update({key: value})