Search code examples
xmlxslttextxslt-2.0

Remove duplicates considering all element values


Here is the sample XML:

<wd:Report_Data xmlns:wd="urn:com.report/RPT">
<wd:Report_Entry>
    <wd:BU>XYZ</wd:BU>
    <wd:JDATE>09-01-2018</wd:JDATE>>
    <wd:DESCR>ZXCVBN</wd:DESCR>
    <wd:LINE>
        <wd:ACCOUNT>2962015</wd:ACCOUNT>
        <wd:MEMO>ABC</wd:MEMO>
        <wd:AMT>85.73</wd:AMT>
        <wd:CURRENCY>USD</wd:CURRENCY>
    </wd:LINE>
    <wd:LINE>
        <wd:ACCOUNT>2703270</wd:ACCOUNT>
        <wd:MEMO>DEF</wd:MEMO>
        <wd:AMT>-85.73</wd:AMT>
        <wd:CURRENCY>USD</wd:CURRENCY>
    </wd:LINE>
</wd:Report_Entry>
<wd:Report_Entry>
    <wd:BU>XYZ</wd:BU>
    <wd:JDATE>09-05-2018</wd:JDATE>
    <wd:DESCR>QWERTY</wd:DESCR>
    <wd:LINE>
        <wd:ACCOUNT>2703270</wd:ACCOUNT>
        <wd:MEMO>ABC</wd:MEMO>
        <wd:AMT>-2000</wd:AMT>
        <wd:CURRENCY>USD</wd:CURRENCY>
    </wd:LINE>
    <wd:LINE>
        <wd:ACCOUNT>2751015</wd:ACCOUNT>
        <wd:MEMO>DEF</wd:MEMO>
        <wd:AMT>2000</wd:AMT>
        <wd:CURRENCY>USD</wd:CURRENCY>
    </wd:LINE>
    <wd:LINE>
        <wd:ACCOUNT>2703270</wd:ACCOUNT>
        <wd:MEMO>ABC</wd:MEMO>
        <wd:AMT>-2000</wd:AMT>
        <wd:CURRENCY>USD</wd:CURRENCY>
    </wd:LINE>
</wd:Report_Entry>
<wd:Report_Entry>
    <wd:BU>XYZ</wd:BU>
    <wd:JDATE>09-01-2018</wd:JDATE>
    <wd:DESCR>ZXCVBN</wd:DESCR>
    <wd:LINE>
        <wd:ACCOUNT>2703270</wd:ACCOUNT>
        <wd:MEMO>DEF</wd:MEMO>
        <wd:AMT>-85.73</wd:AMT>
        <wd:CURRENCY>USD</wd:CURRENCY>
    </wd:LINE>
</wd:Report_Entry>
</wd:Report_Data>

And my XSL is:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:wd="urn:com.report/RPT"
xmlns:this="urn:this-stylesheet"
version="2.0">

<xsl:output method="text"/>

<xsl:template match="wd:Report_Data">
    <xsl:for-each select="wd:Report_Entry">
        <xsl:variable name="BU" select="wd:BU"/>
        <xsl:variable name="JD" select="wd:JDATE"/>
        <xsl:variable name="JM" select="wd:DESCR"/>
    <xsl:for-each select="wd:LINE">
        <xsl:value-of select="$BU"/>
        <xsl:text>|</xsl:text>
        <xsl:value-of select="$JD"/>
        <xsl:text>|</xsl:text>
        <xsl:value-of select="normalize-space(substring(replace($JM,'~',''),1,400))"/>              
        <xsl:text>|</xsl:text>
        <xsl:value-of select="wd:ACCOUNT"/>
        <xsl:text>|</xsl:text>
        <xsl:value-of select="normalize-space(substring(replace(wd:MEMO,'~',''),1,400))"/>
        <xsl:text>|</xsl:text>
        <xsl:value-of select="wd:AMT"/>
        <xsl:text>|</xsl:text>
        <xsl:value-of select="wd:CURRENCY"/>        
        <xsl:call-template name="insertNewLine"/>
    </xsl:for-each>
    </xsl:for-each>     
</xsl:template>

<xsl:template name="insertNewLine">
    <xsl:text>&#10;</xsl:text>
</xsl:template>

</xsl:stylesheet>

I'm not sure what code to add to remove the duplicates considering all element values. I already looked into similar questions but they are mostly based on 1 element. Position is not important as long as the output will only have unique values.

My desired output is:

XYZ|09-01-2018|ZXCVBN|2962015|ABC|85.73|USD
XYZ|09-01-2018|ZXCVBN|2703270|DEF|-85.73|USD
XYZ|09-05-2018|QWERTY|2703270|ABC|-2000|USD
XYZ|09-05-2018|QWERTY|2751015|DEF|2000|USD

Solution

  • One option would be to use xsl:for-each-group and use string-join on the elements you wish to form the grouping key

    <xsl:for-each-group select="wd:Report_Entry/wd:LINE" 
                        group-by="string-join((../wd:BU, ../wd:JDATE, ../wd:DESCR, wd:MEMO, wd:CURRENCY, wd:ACCOUNT), '|')">
    

    Try this XSLT

    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    xmlns:wd="urn:com.report/RPT"
    xmlns:this="urn:this-stylesheet"
    version="2.0">
    
    <xsl:output method="text"/>
    
    <xsl:template match="wd:Report_Data">
        <xsl:for-each-group select="wd:Report_Entry/wd:LINE" 
                            group-by="string-join((../wd:BU, ../wd:JDATE, ../wd:DESCR, wd:MEMO, wd:CURRENCY, wd:ACCOUNT), '|')">
            <xsl:value-of select="../wd:BU"/>
            <xsl:text>|</xsl:text>
            <xsl:value-of select="../wd:JDATE"/>
            <xsl:text>|</xsl:text>
            <xsl:value-of select="normalize-space(substring(replace(../wd:DESCR,'~',''),1,400))"/>              
            <xsl:text>|</xsl:text>
            <xsl:value-of select="wd:ACCOUNT"/>
            <xsl:text>|</xsl:text>
            <xsl:value-of select="normalize-space(substring(replace(wd:MEMO,'~',''),1,400))"/>
            <xsl:text>|</xsl:text>
            <xsl:value-of select="wd:AMT"/>
            <xsl:text>|</xsl:text>
            <xsl:value-of select="wd:CURRENCY"/>        
            <xsl:call-template name="insertNewLine"/>
        </xsl:for-each-group>     
    </xsl:template>
    
    <xsl:template name="insertNewLine">
        <xsl:text>&#10;</xsl:text>
    </xsl:template>
    
    </xsl:stylesheet>
    

    In fact, you could shorten the template to this, as the expense of readability:

    <xsl:template match="wd:Report_Data">
      <xsl:for-each-group select="wd:Report_Entry/wd:LINE" 
                          group-by="string-join((../wd:BU, ../wd:JDATE, normalize-space(substring(replace(../wd:DESCR,'~',''),1,400)), normalize-space(substring(replace(wd:MEMO,'~',''),1,400)), wd:CURRENCY, wd:ACCOUNT), '|')">
          <xsl:value-of select="current-grouping-key()"/>        
          <xsl:call-template name="insertNewLine"/>
      </xsl:for-each-group>
    </xsl:template>
    

    Or, if you really wanted to, do this...

    <xsl:template match="wd:Report_Data">
      <xsl:value-of select="distinct-values(wd:Report_Entry/wd:LINE/string-join((../wd:BU, ../wd:JDATE, normalize-space(substring(replace(../wd:DESCR,'~',''),1,400)), normalize-space(substring(replace(wd:MEMO,'~',''),1,400)), wd:CURRENCY, wd:ACCOUNT), '|'))" separator="&#10;" />
    </xsl:template>