Search code examples
xmlxsltxslt-1.0xslt-2.0xslt-grouping

Eliminate duplicate within nodes and copy complete XML structure in XSLT


I have a sample message in which I need to remove duplicate within the block of party.

The sample message is

<document>
<body>
    <party>
        <pos>
            <attrGroupMany name="temperatureInformation">
                <row>
                    <gtin>1000909090</gtin>
                    <attr name="temperatureCode">STORAGE</attr>
                    <attrQualMany name="temperature">
                        <value qual="FAH">10</value>
                        <value qual="CC">20</value>
                    </attrQualMany>
                </row>
                <row>
                    <attr name="temperatureCode">STORAGE</attr>
                    <attrQualMany name="temperature">
                        <value qual="FAH">10</value>
                        <value qual="CC">20</value>
                    </attrQualMany>
                </row>
                <row>
                    <attr name="temperatureCode">HANDLING</attr>
                    <attrQualMany name="temperature">
                        <value qual="FAH">10</value>                    
                    </attrQualMany>
                </row>
                <row>
                    <attr name="temperatureCode">HANDLING</attr>

                </row>
            </attrGroupMany>
        </pos>
    </party>    
    <party>
        <pos>
            <attrGroupMany name="temperatureInformation">
                <row>
                    <gtin>1000909091</gtin>
                    <attr name="temperatureCode">STORAGE1</attr>
                    <attrQualMany name="temperature">
                        <value qual="FAH">10</value>
                        <value qual="CC">20</value>
                    </attrQualMany>
                </row>
                <row>
                    <attr name="temperatureCode">STORAGE1</attr>
                    <attrQualMany name="temperature">
                        <value qual="FAH">10</value>
                        <value qual="CC">20</value>
                    </attrQualMany>
                </row>
                <row>
                    <attr name="temperatureCode">HANDLING</attr>
                    <attrQualMany name="temperature">
                        <value qual="FAH">10</value>                    
                    </attrQualMany>
                </row>
                <row>
                    <attr name="temperatureCode">HANDLING</attr>
                </row>
            </attrGroupMany>
        </pos>

    </party>
</body>
</document>

The expected output is

<document>
<body>
    <party>
        <pos>
            <attrGroupMany name="temperatureInformation">
                <row>
                    <gtin>1000909090</gtin>
                    <attr name="temperatureCode">STORAGE</attr>
                    <attrQualMany name="temperature">
                        <value qual="FAH">10</value>
                        <value qual="CC">20</value>
                    </attrQualMany>
                </row>
                <row>
                    <attr name="temperatureCode">HANDLING</attr>
                    <attrQualMany name="temperature">
                        <value qual="FAH">10</value>                    
                    </attrQualMany>
                </row>
                <row>
                    <attr name="temperatureCode">HANDLING</attr>                        
                </row>
            </attrGroupMany>
        </pos>
    </party>    
    <party>
        <pos>
            <attrGroupMany name="temperatureInformation">
                <row>
                    <gtin>1000909091</gtin>
                    <attr name="temperatureCode">STORAGE1</attr>
                    <attrQualMany name="temperature">
                        <value qual="FAH">10</value>
                        <value qual="CC">20</value>
                    </attrQualMany>
                </row>
                <row>
                    <attr name="temperatureCode">HANDLING</attr>
                    <attrQualMany name="temperature">
                        <value qual="FAH">10</value>                    
                    </attrQualMany>
                </row>
                <row>
                    <attr name="temperatureCode">HANDLING</attr>
                </row>
            </attrGroupMany>
        </pos>

    </party>
</body>
</document>

As you can see the duplicate with temperatureCode STORAGE and STORAGE1 are removed.

I am using the below XSLT. It is removing duplicate only from first node of party but not processing second node completely. The XSLT which I am using is

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">

<xsl:output indent="yes"/>

<xsl:template match="/document/body"> 
    <xsl:element name="body">

        <xsl:for-each select="party"> 
            <xsl:element name="party">

                <xsl:for-each select="pos">

                    <xsl:element name="pos">

                        <xsl:if test="attrGroupMany[@name = 'temperatureInformation']">
                            <xsl:apply-templates select="attrGroupMany[@name = 'temperatureInformation']/row[generate-id() = generate-id(key('group', concat(attr[@name = 'temperatureCode'], '|', attrQualMany[@name = 'temperature'])))]"/>
                        </xsl:if>
                    </xsl:element>

                </xsl:for-each>

            </xsl:element>

        </xsl:for-each> 
    </xsl:element>
</xsl:template>

<xsl:key name="group" match="attrGroupMany[@name = 'temperatureInformation']/row"
    use="concat(attr[@name = 'temperatureCode'], '|', attrQualMany[@name = 'temperature'])"/>



<xsl:template match="@* | node()">
    <xsl:copy>
        <xsl:apply-templates select="@* | node()"/>
    </xsl:copy>
</xsl:template>




</xsl:stylesheet>

Please let me know where I am going incorrect in XSLT


Solution

  • Adapting the code from you can try https://stackoverflow.com/a/38265314/252228

    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
        version="1.0">
    
        <xsl:key name="group" match="party/pos/attrGroupMany[@name = 'temperatureInformation']/row"
            use="concat(generate-id(ancestor::pos), '|', attr[@name = 'temperatureCode'], '|', attrQualMany[@name = 'temperature'])"/>
    
        <xsl:template match="@* | node()">
            <xsl:copy>
                <xsl:apply-templates select="@* | node()"/>
            </xsl:copy>
        </xsl:template>
    
        <xsl:template match="attrGroupMany[@name = 'temperatureInformation']">
            <xsl:copy>
                <xsl:apply-templates select="@*"/>
                <xsl:apply-templates select="row[generate-id() = generate-id(key('group', concat(generate-id(ancestor::pos), '|', attr[@name = 'temperatureCode'], '|', attrQualMany[@name = 'temperature']))[1])]"/>
            </xsl:copy>
        </xsl:template>
    
    </xsl:stylesheet>
    

    As you seem to want to ignore the <gtin>1000909090</gtin> when checking for "duplicates" note that the above stylesheet simply copies the first identified "duplicate" based on the attr[@name = 'temperatureCode'] and attrQualMany[@name = 'temperature'] inside of a single pos, if there are further rows with other gtin child elements or additional child elements these will not be copied to the result.