Search code examples
pythonpython-collections

Why am I getting and adding behavior on a recursive method?


I am trying to generate XML with xmltodict. To do this I need to generate structures based on OrderedDict. I get the expected behavior the first time I execute my ".xml()" method but when I execute a second time I get the children elements added a second time.

from collections import OrderedDict
import xmltodict

class XMLObject(object):
    __ns__ = ''
    def __init__(self, parent=None, **kwargs):
        self.parent = None
        if parent:
            self.__ns__ = parent.__ns__
            self.parent = parent
        self._attrib = OrderedDict()
        self.children = []

    def __setitem__(self, key, value):
        self._attrib[key] = value

    def __getitem__(self, item):
        return self._attrib[item]

    @property
    def tagname(self):
        return (self.__ns__ + ':' + self.__tag__) if self.__ns__ else self.__tag__

    def object(self):
        store = OrderedDict()
        print id(store)
        tag = self.tagname
        print tag
        store[tag] = self._attrib
        print self.children
        for child in self.children:
            print "-" + child.tagname
            if not child.tagname in store[tag]:
                store[tag][child.tagname] = list()
            store[tag][child.tagname].append(child.object()[child.tagname])
        return store

    def xml(self, pretty=False, clean=False):
        xml = xmltodict.unparse(self.object())
        return xml


class A(XMLObject):
    __tag__ = 'A'

    def __init__(self, *args, **kwargs):
        super(A, self).__init__(*args, **kwargs)
        self.children = [B(parent=self)]


class B(XMLObject):
    __tag__ = 'B'

if __name__ == '__main__':
    obj = A()
    print obj.xml()
    print obj.xml()
    print obj.xml()

In the ipython console this are the results.

In [1]: from question import *

In [2]: obj = A()

In [3]: obj.xml()
35393280
A
[<question.B object at 0x7f3bc226f890>]
-B
35394720
B
[]
Out[3]: u'<?xml version="1.0" encoding="utf-8"?>\n<A><B></B></A>'

In [4]: obj.xml()
35487136
A
[<question.B object at 0x7f3bc226f890>]
-B
35487936
B
[]
Out[4]: u'<?xml version="1.0" encoding="utf-8"?>\n<A><B></B><B></B></A>'

In [5]: obj.xml()
35487136
A
[<question.B object at 0x7f3bc226f890>]
-B
35488896
B
[]
Out[5]: u'<?xml version="1.0" encoding="utf-8"?>\n<A><B></B><B></B><B></B></A>'

Solution

  • You are storing a reference to self._attrib in your OrderedDict():

    store[tag] = self._attrib
    

    This is not a copy; you are sharing the instance attribute with the store object here.

    So when you are later adding to the dictionary, you are altering the same object as what self._attrib is referring to:

    if not child.tagname in store[tag]:
        store[tag][child.tagname] = list()
    store[tag][child.tagname].append(child.object()[child.tagname])
    

    You don't reset this state; self._attribremains altered whenA.xml()` returns.

    You most likely wanted to use a copy of the self._attrib object there:

    store[tag] = self._attrib.copy()
    

    This produces a shallow copy; any references to mutable objects in self._attrib are just copied across. Use copy.deepcopy(). to create a recursive deep copy.