Search code examples
pythonxmlcherrypy

Return XML Values from Multiple Files in Python?


So I am working on a browser based program, written in Python, that parses XML data from multiple files in a directory, then returns the values of certain XML tags on the page. I have successfully been able to return the values from one of the XML files, but am hoping to collect data from every file within the directory and return the values in spreadsheet format. How do I parse the data from every XML file? Also, the XML files are not static, there will be new files coming and going. Thanks! Below is my code:

from xml.dom.minidom import parseString

import os
path = 'C:\Vestigo\XML'
listing = os.listdir(path)
for infile in listing:
    print infile

file = open(os.path.join(path,infile),'r')

data = file.read()
file.close()

dom = parseString(data)

xmlTag0 = dom.getElementsByTagName('Extrinsic')[0].toxml()
xmlData0 = xmlTag0.replace('<Extrinsic>','').replace('</Extrinsic>','')
xmlTag1 = dom.getElementsByTagName('DeliverTo')[0].toxml()
xmlData1 = xmlTag1.replace('<DeliverTo>','').replace('</DeliverTo>','')
xmlTag2 = dom.getElementsByTagName('Street1')[0].toxml()
xmlData2 = xmlTag2.replace('<Street1>','').replace('</Street1>','')
xmlTag3 = dom.getElementsByTagName('City')[0].toxml()
xmlData3 = xmlTag3.replace('<City>','').replace('</City>','')
xmlTag4 = dom.getElementsByTagName('State')[0].toxml()
xmlData4 = xmlTag4.replace('<State>','').replace('</State>','')
xmlTag5 = dom.getElementsByTagName('PostalCode')[0].toxml()
xmlData5 = xmlTag5.replace('<PostalCode>','').replace('</PostalCode>','')


import cherrypy
class Root(object):
    def index(self):
        return ('Order Number:', ' ', xmlData0, '<br>Name: ', xmlData1, '<br>Street   Address: ', xmlData2, '<br>City/State/Zip: ', xmlData3, ', ', xmlData4, ' ', xmlData5, ' ', """<br><br><a href="/exit">Quit</a>""")
    index.exposed = True

    def exit(self):
        raise SystemExit(0)
    exit.exposed = True

def start():
    import webbrowser
    cherrypy.tree.mount(Root(), '/')
    cherrypy.engine.start_with_callback(
        webbrowser.open,
        ('http://localhost:8080/',),
        )
    cherrypy.engine.block()

if __name__=='__main__':
    start()

EDIT: Updated with my solution below.


Solution

  • In order to pull data from every file in the directory I used this code below:

    from xml.dom.minidom import parse, parseString
    import os, glob, re
    import cherrypy
    class Root(object):
        def index(self):
            path = 'C:\Vestigo\XML'
    
            TOTALXML = len(glob.glob(os.path.join(path, '*.xml')))
            print TOTALXML
            i = 0
    
            for XMLFile in glob.glob(os.path.join(path, '*.xml')):
                xmldoc = parse(XMLFile)
                order_number = xmldoc.getElementsByTagName('Extrinsic')[0].firstChild.data
                order_name = xmldoc.getElementsByTagName('DeliverTo')[0].firstChild.data
                street1 = xmldoc.getElementsByTagName('Street1')[0].firstChild.data
                state = xmldoc.getElementsByTagName('State')[0].firstChild.data
                zip_code = xmldoc.getElementsByTagName('PostalCode')[0].firstChild.data
                OUTPUTi = order_number+' '+order_name+' '+street1+' '+state+' '+zip_code
                i += 1
                print OUTPUTi
            return (OUTPUTi, """<br><br><a href="/exit">Quit</a>""")
        index.exposed = True
    
        def exit(self):
            raise SystemExit(0)
        exit.exposed = True
    
    def start():
        import webbrowser
        cherrypy.tree.mount(Root(), '/')
        cherrypy.engine.start_with_callback(
            webbrowser.open,
            ('http://localhost:8080/',),
            )
        cherrypy.engine.block()
    
    if __name__=='__main__':
        start()
    

    Thanks for your help everyone, and for the tip on answering my own question Sheena!