Search code examples
pythonxmlindentationyattag

Indentation error when generating xml file using python script


I'm attempting to create an XML file with a python script by reading an excel sheet. Using yattag I'm able to accomplish this although not quite how I need the formatting. I've pasted the code below and have already verified there has been no mixing of spaces/tabs.

The goal is to wrap the entire item in the 'node' tag and have 2 more subcategories for both 'category' tags. I'm getting the error because after the 'node' tag I have 2 tabs before the 'location' tab. If I fix the error I get the first set of code. Basically just need to pull down the '</node' to the bottom if that makes any sense.

<node type="document" action="create">
        <location>TempCD</location>
        <title>doc1</title>
        <file>E:\Doc1.docx</file>
        <mime>application</mime>
    </node>
    <category name="Content">
        <attribute name="Function">asd</attribute>
        <attribute name="Commodity">sf</attribute>
        <attribute name="Sub-Commodity">qw</attribute>
        <attribute name="Contract/Document Owner">e</attribute>
        <subitems>reapply</subitems>
    </category>
    <category name="Content Server Categories:LYB:LYB-GSC-Contracts">
        <attribute name="Supplier">Altom Transport</attribute>
        <attribute name="Pricing Terms">Fixed</attribute>
        <attribute name="Term Type">Fixed</attribute>
        <subitems name="Commodity">reapply</subitems>
    </category>
     from openpyxl import load_workbook
        from yattag import Doc, indent
        
        wb = load_workbook("input_sample.xlsx")
        ws = wb.worksheets[0]
        
        # Create Yattag doc, tag and text objects
        doc, tag, text = Doc().tagtext()
        
        xml_header = '<?xml version="1.0" encoding="UTF-8"?>'
        xml_schema = '<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"></xs:schema>'
        
        doc.asis(xml_header)
        doc.asis(xml_schema)
        
        for row in ws.iter_rows(min_row=2):
            row = [cell.value for cell in row]
            with tag('node', type=row[0], action=row[1]):
                    with tag("location"): text(row[2])
                    with tag("title"): text(row[3])
                    with tag("file"): text(row[4])
                    with tag("mime"): text(row[5])
                with tag('category', name=row[6]):
                    with tag("attribute", name='Function'): text(row[7])
                    with tag("attribute", name='Commodity'): text(row[8])
                    with tag("attribute", name='Sub-Commodity'): text(row[9])
                    with tag("attribute", name='Contract/Document Owner'): text(row[10])
                    with tag("subitems"): text("reapply")
                with tag('category', name=row[11]):
                    with tag("attribute", name='Supplier'): text(row[12])
                    with tag("attribute", name='Pricing Terms'): text(row[13])
                    with tag("attribute", name='Term Type'): text(row[14])
                    with tag("subitems"): text("reapply")
        
        result = indent(
            doc.getvalue(),
            indentation = '    ',
            indent_text = False
        )
        
        with open("test_resulted.xml", "w") as f:
            f.write(result)

Solution

  • This should give you the xml you're looking for:

    from openpyxl import load_workbook
    from yattag import Doc, indent
    
    wb = load_workbook("input_sample.xlsx")
    ws = wb.worksheets[0]
    
    # Create Yattag doc, tag and text objects
    doc, tag, text = Doc().tagtext()
    
    xml_header = '<?xml version="1.0" encoding="UTF-8"?>'
    xml_schema = '<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"></xs:schema>'
    
    doc.asis(xml_header)
    #doc.asis(xml_schema)  # invalid
    
    with tag('root'):  # required for valid xml
        for row in ws.iter_rows(min_row=2):
            row = [cell.value for cell in row]
            with tag('node', type=row[0], action=row[1]):
                    with tag("location"): text(row[2])
                    with tag("title"): text(row[3])
                    with tag("file"): text(row[4])
                    with tag("mime"): text(row[5])
                    with tag('category', name=row[6]):
                        with tag("attribute", name='Function'): text(row[7])
                        with tag("attribute", name='Commodity'): text(row[8])
                        with tag("attribute", name='Sub-Commodity'): text(row[9])
                        with tag("attribute", name='Contract/Document Owner'): text(row[10])
                        with tag("subitems"): text("reapply")
                    with tag('category', name=row[11]):
                        with tag("attribute", name='Supplier'): text(row[12])
                        with tag("attribute", name='Pricing Terms'): text(row[13])
                        with tag("attribute", name='Term Type'): text(row[14])
                        with tag("subitems"): text("reapply")
                    
    
    result = indent(
    doc.getvalue(),
    indentation = '    ',
    indent_text = False
    )
    
    with open("test_resulted.xml", "w") as f:
       f.write(result)
    

    Output

    <?xml version="1.0" encoding="UTF-8"?>
    <root>
        <node type="2" action="2">
            <location>2</location>
            <title>2</title>
            <file>2</file>
            <mime>2</mime>
            <category name="2">
                <attribute name="Function">2</attribute>
                <attribute name="Commodity">2</attribute>
                <attribute name="Sub-Commodity">2</attribute>
                <attribute name="Contract/Document Owner">2</attribute>
                <subitems>reapply</subitems>
            </category>
            <category name="2">
                <attribute name="Supplier">2</attribute>
                <attribute name="Pricing Terms">2</attribute>
                <attribute name="Term Type">2</attribute>
                <subitems>reapply</subitems>
            </category>
        </node>
        <node>
           ..........
        </node>
        ..............
    </root>