I'm currently building large xml files with xml.dom.minidom
and then writing them out to file via the toprettyxml
. is there a way to stream the xml to a document because I'm hitting memory errors.
def run(self):
while True:
domain = self.queue.get()
try:
conn = boto.connect_sdb(awsa, awss)
sdbdomain = conn.get_domain(domain)
s3conn = boto.connect_s3(awsa, awss)
archbucket = s3conn.get_bucket("simpledbbu")
doc = None
doc = Document()
root = doc.createElement("items")
doc.appendChild(root)
countermax = 0
counter = 0
for item in sdbdomain:
node = doc.createElement("item")
node.setAttribute("itemName", item.name)
for k,v in item.items():
if not isinstance(v, basestring):
i = 0
for val in v:
node.setAttribute("{0}::{1}".format(k,i),val)
i += 1
else:
node.setAttribute(k,v)
root.appendChild(node)
k = Key(archbucket)
k.key = "{0}/{1}.xml".format(datetime.date.today().strftime("%Y%m%d"),sdbdomain.name)
#x = doc.toprettyxml(indent=" ")
f = open(domain + ".xml", "w")
f.truncate()
f.write(doc.toprettyxml(indent=" "))
f.close()
#k.content_type.encode('ascii')
k.set_contents_from_filename(f.name)
os.remove(os.path.join(os.getcwd(),f.name))
except:
print "failed to load domain: {0}".format(domain)
print formatExceptionInfo()
finally:
self.queue.task_done()
building large xml files with xml.dom.minidom and then writing them out to file via the toprettyxml.
If you run out of memory you should probably stop doing that.
You can build XML with simple string manipulation.
with open(domain + ".xml", "w") as f:
f.write( "<?xml..." )
f.write( "<items>" )
for item in sdbdomain:
buffer= []
for k,v in item.items():
if not isinstance(v, basestring):
for i, val in enumerate(v):
txt= '{0}::{1}="{2}"'.format(k,i,val)
else:
txt= '{0}="{1}"'.format(k,v)
buffer.append( txt )
f.write( " <item {0}/>\n".format( " ".join(buffer) ))
f.write( "</items>" )
k= ................
k.set_contents_from_filename(f.name)
Something like that ought to allow you to write the XML to a temporary file without making a large DOM object in memory.