Search code examples
pythonxmlxsltsaxon-c

saxonc's transform_to_file(), executed in a loop, doesn't transform but gives non-sensical errors or partial output


My transformation stylesheet file contains:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
    <xsl:output method="xml" encoding="UTF-8" indent="yes"/>
    <xsl:template match="/">
        <cities>
            <xsl:for-each select="cities/country">
                <city name="{@capital}" isCapital="true"/>
            </xsl:for-each>
        </cities>
    </xsl:template>
</xsl:stylesheet>

My python code:

import os
import xml.etree.ElementTree as ET
from saxonpy import PySaxonProcessor

def main():
    print('starting code...')
    source_XML = '''
        <data>
            <country name="Denmark" capital="Copenhagen"/>
            <country name="Germany" capital="Berlin"/>
            <country name="France" capital="Paris"/>
        </data>
    '''
    parentroot = ET.fromstring(source_XML)
    children = list(parentroot)

    # create individual raw xmls
    cnt = 0
    for child in children:
        cnt = cnt + 1
        childroot = ET.Element("cities")
        childroot.append(child)
        tempfile_tree = ET.ElementTree(childroot)
   
        # tempfile = "C:\\pythonProject\\stackoverflow\\tmp.xml"
        # tempfile = "C:\\gaga\\tmp.xml"
        # tempfile = os.path.abspath("tmp.xml")
        tempfile = "tmp.xml"

        transformedfile = f"output_{cnt}.xml"
        with open(tempfile, 'wb') as f:
            tempfile_tree.write(f, encoding='utf-8', xml_declaration=True)

        try:
            with PySaxonProcessor(license=False) as proc:
                proc.set_cwd(os.getcwd())
                xsltproc = proc.new_xslt30_processor()
                xsltproc.transform_to_file(source_file=tempfile,
                                           stylesheet_file="transformer.xsl",
                                           output_file=transformedfile)
                print(f"{transformedfile} has been created.")
        except Exception as e:
            print(e)

if __name__ == "__main__":
    main()

my problem

I have saxonpy imported. I'm trying to run saxonc's transform_to_file() inside a loop. I'm unable to get the transformed output files. Depending on what I set for tempfile, i get

        #   I/O error reported by XML parser processing C:\pythonProject\stackoverflow\░╚╒E ⌂:
        #   unknown protocol: c. Caused by java.net.MalformedURLException: unknown protocol: c

or

Content is not allowed in prolog
(this is definitely not the case, I checked the tempfile with a hexeditor)

or no error but output file containing only:

        # <?xml version="1.0" encoding="UTF-8"?>
        # <cities/>

but also correct outputs (but I can't reproduce it anymore).

Note: I disabled MAX_PATH when installing python 3.10. Note: using Pycharm with poetry venv


Solution

  • With Apply_templates_returning_file() it works:

    import os
    import xml.etree.ElementTree as ET
    from saxonpy import PySaxonProcessor
    
    def main():
        print('starting code...')
        source_XML = '''
            <data>
                <country name="Denmark" capital="Copenhagen"/>
                <country name="Germany" capital="Berlin"/>
                <country name="France" capital="Paris"/>
            </data>
        '''
        parentroot = ET.fromstring(source_XML)
        children = list(parentroot)
    
        try:
            with PySaxonProcessor(license=False) as proc:
                proc.set_cwd(os.getcwd())
                xsltproc = proc.new_xslt30_processor()
                xslt30_transformer = xsltproc.compile_stylesheet(stylesheet_file="transformer.xsl")
    
                cnt = 0
                for child in children:
                    cnt = cnt + 1
                    childroot = ET.Element("cities")
                    childroot.append(child)
                    tempfile_tree = ET.ElementTree(childroot)
    
                    tempfile = "tmp_1234567890ABCDEFGHIJKLMNOP.xml"
                    transformedfile = f"output_{cnt}.xml"
    
                    with open(tempfile, 'wb') as f:
                        tempfile_tree.write(f, xml_declaration=True)
    
                    xslt30_transformer.apply_templates_returning_file(source_file=tempfile,
                                                                      output_file=transformedfile)
                    print(f"{transformedfile} has been created.")
    
        except Exception as e:
            print(e)
    
    if __name__ == "__main__":
        main()