So I am working on a browser based program, written in Python, that parses XML data from multiple files in a directory, then returns the values of certain XML tags on the page. I have successfully been able to return the values from one of the XML files, but am hoping to collect data from every file within the directory and return the values in spreadsheet format. How do I parse the data from every XML file? Also, the XML files are not static, there will be new files coming and going. Thanks! Below is my code:
from xml.dom.minidom import parseString
import os
path = 'C:\Vestigo\XML'
listing = os.listdir(path)
for infile in listing:
print infile
file = open(os.path.join(path,infile),'r')
data = file.read()
file.close()
dom = parseString(data)
xmlTag0 = dom.getElementsByTagName('Extrinsic')[0].toxml()
xmlData0 = xmlTag0.replace('<Extrinsic>','').replace('</Extrinsic>','')
xmlTag1 = dom.getElementsByTagName('DeliverTo')[0].toxml()
xmlData1 = xmlTag1.replace('<DeliverTo>','').replace('</DeliverTo>','')
xmlTag2 = dom.getElementsByTagName('Street1')[0].toxml()
xmlData2 = xmlTag2.replace('<Street1>','').replace('</Street1>','')
xmlTag3 = dom.getElementsByTagName('City')[0].toxml()
xmlData3 = xmlTag3.replace('<City>','').replace('</City>','')
xmlTag4 = dom.getElementsByTagName('State')[0].toxml()
xmlData4 = xmlTag4.replace('<State>','').replace('</State>','')
xmlTag5 = dom.getElementsByTagName('PostalCode')[0].toxml()
xmlData5 = xmlTag5.replace('<PostalCode>','').replace('</PostalCode>','')
import cherrypy
class Root(object):
def index(self):
return ('Order Number:', ' ', xmlData0, '<br>Name: ', xmlData1, '<br>Street Address: ', xmlData2, '<br>City/State/Zip: ', xmlData3, ', ', xmlData4, ' ', xmlData5, ' ', """<br><br><a href="/exit">Quit</a>""")
index.exposed = True
def exit(self):
raise SystemExit(0)
exit.exposed = True
def start():
import webbrowser
cherrypy.tree.mount(Root(), '/')
cherrypy.engine.start_with_callback(
webbrowser.open,
('http://localhost:8080/',),
)
cherrypy.engine.block()
if __name__=='__main__':
start()
EDIT: Updated with my solution below.
In order to pull data from every file in the directory I used this code below:
from xml.dom.minidom import parse, parseString
import os, glob, re
import cherrypy
class Root(object):
def index(self):
path = 'C:\Vestigo\XML'
TOTALXML = len(glob.glob(os.path.join(path, '*.xml')))
print TOTALXML
i = 0
for XMLFile in glob.glob(os.path.join(path, '*.xml')):
xmldoc = parse(XMLFile)
order_number = xmldoc.getElementsByTagName('Extrinsic')[0].firstChild.data
order_name = xmldoc.getElementsByTagName('DeliverTo')[0].firstChild.data
street1 = xmldoc.getElementsByTagName('Street1')[0].firstChild.data
state = xmldoc.getElementsByTagName('State')[0].firstChild.data
zip_code = xmldoc.getElementsByTagName('PostalCode')[0].firstChild.data
OUTPUTi = order_number+' '+order_name+' '+street1+' '+state+' '+zip_code
i += 1
print OUTPUTi
return (OUTPUTi, """<br><br><a href="/exit">Quit</a>""")
index.exposed = True
def exit(self):
raise SystemExit(0)
exit.exposed = True
def start():
import webbrowser
cherrypy.tree.mount(Root(), '/')
cherrypy.engine.start_with_callback(
webbrowser.open,
('http://localhost:8080/',),
)
cherrypy.engine.block()
if __name__=='__main__':
start()
Thanks for your help everyone, and for the tip on answering my own question Sheena!