Search code examples
xmlxsltocrxslt-2.0abbyy

Translating complex XML to simplified XML using XSL


I have made a XSL file to transform the src.xml to target.xml. But I am not getting the correct expected output file.

Here's my code:

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<document xmlns="http://www.abbyy.com/FineReader_xml/FineReader10-schema-v1.xml" version="1.0" producer="ABBYY FineReader Engine 12" languages="" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.abbyy.com/FineReader_xml/FineReader10-schema-v1.xml http://www.abbyy.com/FineReader_xml/FineReader10-schema-v1.xml">
<page width="294" height="189" resolution="120" originalCoords="1">
<block blockType="Text" blockName="" l="0" t="5" r="272" b="185"><region><rect l="0" t="5" r="272" b="185"/></region>
<text>
<par lineSpacing="2410">
<line baseline="30" l="1" t="6" r="72" b="30"><formatting lang="EnglishUnitedStates">hello</formatting></line></par>
<par lineSpacing="1840">
<line baseline="87" l="0" t="69" r="179" b="87"><formatting lang="EnglishUnitedStates">this is a website</formatting></line></par>
<par lineSpacing="1260">
<line baseline="136" l="0" t="122" r="269" b="140"><formatting lang="EnglishUnitedStates">Is the writing getting smaller?</formatting></line></par>
<par lineSpacing="1260">
<line baseline="182" l="0" t="169" r="133" b="182"><formatting lang="EnglishUnitedStates">IM SHRINKING</formatting></line></par>
</text>
</block>
</page>
</document>

Now this is the XSL that will convert complex xml to normal xml:

 <?xml version="1.0" encoding="UTF-8"?>
    <xsl:stylesheet version="1.0"  xmlns="http://www.abbyy.com/FineReader_xml/FineReader10-schema-v1.xml"   
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"  xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:output method="xml" indent="yes"/>
        <xsl:template match="/">
            <document>
                <page>
                    <block>
                        <xsl:variable name="blockType" select="/document/page/block/@blockType"/>
                        <!-- The variable blockType can be used for further processing.  -->
                        
                        
                        <xsl:attribute name="blockType"><xsl:value-of select="$blockType"/></xsl:attribute>
                        <text>
                            <xsl:for-each select="/document/page/block/text/par">
                                <paragraph>
                                    <line>
                                        <xsl:value-of   select="./line"/>
                                    </line>
                                </paragraph>
                            </xsl:for-each>
                        </text>
                    </block>
                </page>
            </document>
        </xsl:template>
    </xsl:stylesheet>

Actual Output

<?xml version="1.0" encoding="UTF-8"?><document xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.abbyy.com/FineReader_xml/FineReader10-schema-v1.xml">
    <page>
        <block blockType="">
            <text/>
        </block>
    </page>
</document>

Expected Output I want:

<?xml version="1.0" encoding="UTF-8"?>
<document>
    <page>
        <block blockType="Text">
            <text>
                <paragraph>
                    <line>hello</line>
                </paragraph>
                <paragraph>
                    <line>this is a website</line>
                </paragraph>
                <paragraph>
                    <line>Is the writing getting smaller?</line>
                </paragraph>
                <paragraph>
                    <line>IM SHRINKING</line>
                </paragraph>
            </text>
        </block>
    </page>
</document>

Solution

  • You can use xpath-default-namespace for XSLT:2.0

        <?xml version="1.0" encoding="UTF-8"?>
    <xsl:stylesheet version="2.0"   
        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"  xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xpath-default-namespace="http://www.abbyy.com/FineReader_xml/FineReader10-schema-v1.xml">
        <xsl:output method="xml" indent="yes"/>
        <xsl:template match="/">
            <document>
                <page>
                    <block>
                        <xsl:variable name="blockType" select="/document/page/block/@blockType"/>
                        <!-- The variable blockType can be used for further processing.  -->
                        
                        
                        <xsl:attribute name="blockType"><xsl:value-of select="$blockType"/></xsl:attribute>
                        <text>
                            <xsl:for-each select="/document/page/block/text/par">
                                <paragraph>
                                    <line>
                                        <xsl:value-of   select="./line"/>
                                    </line>
                                </paragraph>
                            </xsl:for-each>
                        </text>
                    </block>
                </page>
            </document>
        </xsl:template>
    </xsl:stylesheet>
    

    check: http://xsltfiddle.liberty-development.net/gWmuiHX/30