Search code examples
xslt-2.0xslt-3.0

XSLT to remove duplicates segment when its have same child elements


i am trying to write an XSLT which will delete duplicate ITEM segment when its having same child node values, under ITEM for example i have given 6 fields, but there is a possibility that few more empty tags or field with values might come, how can i declare that in XSLT in generic way, can we use preceding sibling function to look for same ITEM segments under BXYI segment and remove that? or do i need to define all the possible fields? Please check once, and my xslt code aswell, which is not removing duplicate ITEM segments in the first place.

Input sample

<?xml version="1.0" encoding="UTF-8"?>
<D02X001>
    <DOC BEGIN="1">
        <DC40 SEGMENT="1">
            <NAM>DC40</NAM>     
        </DC40>
        <BXYH SEGMENT="1">
            <LDAT>date</LDAT>
            <UDAT>date1</UDAT>          
            <BXYI SEGMENT="1">
                <TNR>123453</TNR>           
                <ORT>1000</ORT>
                
                <ITEM SEGMENT="1">
                    <N11>6789</N11>
                    <AR>03</AR>
                    <PQC>NU</PQC>
                    <QTY>90909</QTY>
                    <NUM/>
                    <ASCD/>
                </ITEM>
                <ITEM SEGMENT="1">
                    <N11>6789</N11>
                    <AR>03</AR>
                    <PQC>NU</PQC>
                    <QTY>3456</QTY>
                    <NUM/>
                    <ASCD/>
                </ITEM>
                <ITEM SEGMENT="1">
                    <N11>6789</N11>
                    <AR>03</AR>
                    <PQC>NU</PQC>
                    <QTY>3456</QTY>
                    <NUM/>
                    <ASCD/>
                </ITEM>
                <ITEM SEGMENT="1">
                    <N11>6789</N11>
                    <AR>03</AR>
                    <PQC>NU</PQC>
                    <QTY>3456</QTY>
                    <NUM/>
                    <ASCD/>
                </ITEM>
            </BXYI>
        </BXYH>
    </DOC>
</D02X001>

output sample

<?xml version="1.0" encoding="UTF-8"?>
<D02X001>
    <DOC BEGIN="1">
        <DC40 SEGMENT="1">
            <NAM>DC40</NAM>     
        </DC40>
        <BXYH SEGMENT="1">
            <LDAT>date</LDAT>
            <UDAT>date1</UDAT>          
            <BXYI SEGMENT="1">
                <TNR>123453</TNR>           
                <ORT>1000</ORT>
                
                <ITEM SEGMENT="1">
                    <N11>6789</N11>
                    <AR>03</AR>
                    <PQC>NU</PQC>
                    <QTY>90909</QTY>
                    <NUM/>
                    <ASCD/>
                </ITEM>
                <ITEM SEGMENT="1">
                    <N11>6789</N11>
                    <AR>03</AR>
                    <PQC>NU</PQC>
                    <QTY>3456</QTY>
                    <NUM/>
                    <ASCD/>
                </ITEM>     
            </BXYI>
        </BXYH>
    </DOC>
</D02X001>

XSLT I used

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:key name="uniqueItems" match="ITEM" use="concat(N11, '-', AR, '-', PQC, '-', QTY)"/>
    <xsl:output method="xml" indent="yes"/>
    <xsl:template match="/">
        <xsl:copy-of select="."/>
    </xsl:template>
    <xsl:template match="BXYH">
        <xsl:copy>
            <xsl:apply-templates select="*"/>
            <BXYI SEGMENT="1">
                <xsl:for-each select="ITEM">
                    <xsl:if test="generate-id() = generate-id(key('uniqueItems', concat(N11, '-', AR, '-', PQC, '-', QTY))[1])">
                        <xsl:copy-of select="."/>
                    </xsl:if>
                </xsl:for-each>
            </BXYI>
        </xsl:copy>
    </xsl:template>
</xsl:stylesheet>

Solution

  • In XSLT 3 you can do that with a composite grouping key e.g.

      <xsl:template match="*[ITEM/@SEGMENT]">
        <xsl:copy>
          <xsl:apply-templates select="@*, * except ITEM"/>
          <xsl:for-each-group select="ITEM" composite="yes" group-by="@SEGMENT, *">
            <xsl:sequence select="."/>
          </xsl:for-each-group>
        </xsl:copy>
      </xsl:template>
    

    Complete XSLT would be e.g.

    <?xml version="1.0" encoding="utf-8"?>
    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
      version="3.0"
      xmlns:xs="http://www.w3.org/2001/XMLSchema"
      exclude-result-prefixes="#all">
      
      <xsl:template match="*[ITEM/@SEGMENT]">
        <xsl:copy>
          <xsl:apply-templates select="@*, * except ITEM"/>
          <xsl:for-each-group select="ITEM" composite="yes" group-by="@SEGMENT, *">
            <xsl:sequence select="."/>
          </xsl:for-each-group>
        </xsl:copy>
      </xsl:template>
    
      <xsl:output method="xml" indent="yes"/>
    
      <xsl:mode on-no-match="shallow-copy"/>
    
    </xsl:stylesheet>
    

    Online fiddle.