Search code examples
xmlxsltdistinct-valuesxslt-groupingworkday-api

XSLT Removing duplicates from group


An employee can be a manager, an HR partner, or both. They can be a manager in any number of cost centers. They can be an HR partner of any number of cost centers.

If I pull the combinations I need out of every record, I receive duplicates. Employee A and Employee B report to same manager in same cost center, so it returns two lines. One for Employee A and one for Employee B. Because I don't pull their employeeIDs (only manager's and hr partner's), they appear as duplicate lines. I am currently using '[not(.=preceding::' to eliminate duplicates, but checks against the entire file and only allows a manager to belong to one cost center rather than multiple. How can I perform the check ONLY on the group (cost center) instead of the whole file (all cost centers)?

I've tried two composite keys (managerID,cost center ID) and (hr partner, cost center id), but struggled with it until it got to messy and wasn't working. I unfortunately saved this code over it. So if that's really my only option, I'll have to rewrite it and troubleshoot from there.

Input XML Sample:

<wd:Report_Data>
    <wd:Report_Entry>
        <wd:Sub_Employee_ID>123345678</wd:Sub_Employee_ID>
        <wd:Cost_Center_ID>20000003</wd:Cost_Center_ID>
        <wd:HR_Partner>
            <wd:ID wd:type="Employee_ID">81025060</wd:ID>
        </wd:HR_Partner>
        <wd:HR_Partner>
            <wd:ID wd:type="Employee_ID">81452340</wd:ID>
        </wd:HR_Partner>
        <wd:Manager>
            <wd:Employee_ID>81055097</wd:Employee_ID>
        </wd:Manager>
        <wd:Role_ID>2005</wd:Role_ID>
    </wd:Report_Entry>
    <wd:Report_Entry>
        <wd:Sub_Employee_ID>22345123</wd:Sub_Employee_ID>
        <wd:Cost_Center_ID>20000003</wd:Cost_Center_ID>
        <wd:HR_Partner>
            <wd:ID wd:type="Employee_ID">81025060</wd:ID>
        </wd:HR_Partner>
        <wd:Manager>
            <wd:Employee_ID>81055097</wd:Employee_ID>
        </wd:Manager>
        <wd:Role_ID>2005</wd:Role_ID>
    </wd:Report_Entry>
    <wd:Report_Entry>
        <wd:Sub_Employee_ID>33424567</wd:Sub_Employee_ID>
        <wd:Cost_Center_ID>20000003</wd:Cost_Center_ID>
        <wd:HR_Partner>
            <wd:ID wd:type="Employee_ID">81022549</wd:ID>
        </wd:HR_Partner>
        <wd:Manager>
            <wd:Employee_ID>12312312</wd:Employee_ID>
        </wd:Manager>
        <wd:Role_ID>2005</wd:Role_ID>
    </wd:Report_Entry>
    <wd:Report_Entry>
        <wd:Sub_Employee_ID>72390123</wd:Sub_Employee_ID>
        <wd:Cost_Center_ID>40000006</wd:Cost_Center_ID>
        <wd:HR_Partner>
            <wd:ID wd:type="Employee_ID">81025060</wd:ID>
        </wd:HR_Partner>
        <wd:Manager>
            <wd:Employee_ID>81055097</wd:Employee_ID>
        </wd:Manager>
        <wd:Role_ID>2005</wd:Role_ID>
    </wd:Report_Entry>
</wd:Report_Data>

Desired output:

81055097;
20000003;
2005;
Manager

81025060;
20000003;
2005;
HR Partner

81452340
20000003;
2005;
HR Partner

12312312
20000003;
2005;
Manager

81022549;
20000003;
2005;
HR Partner

81055097;
40000006;
2005;
Manager

81025060;
40000006;
2005;
HR_Partner

Current xslt:

<xsl:template match="wd:Report_Data/wd:Report_Entry">
    <xsl:for-each-group select="." group-by="wd:Cost_Center_ID">

        <!-- Manager Line -->
        <!-- Filter Duplicates -->
        <xsl:sort select="wd:Manager/wd:Employee_ID"/>
        <xsl:if test="wd:Manager/wd:Employee_ID[not(.=preceding::wd:Manager/wd:Employee_ID)]">
            <xsl:value-of select="wd:Manager/wd:Employee_ID"/>;
            <xsl:value-of select="wd:Cost_Center_ID"/>;
            <xsl:value-of select="wd:Role_ID"/>;
            <xsl:value-of>Manager</xsl:value-of>
            <xsl:value-of select="$linefeed"/>
        </xsl:if>

        <!-- HR Partner Line -->
        <!-- Filter Duplicates -->
        <xsl:for-each select="../wd:HR_Partner/wd:ID[@wd:type='Employee_ID']">
            <xsl:if test=".[not(.=preceding::wd:HR_Partner/wd:ID[@wd:type='Employee_ID'])]">
                <xsl:value-of select="wd:ID[@wd:type='Employee_ID']"/>;
                <xsl:value-of select="../wd:Cost_Center_ID"/>;
                <xsl:value-of select="../wd:Role_ID"/>;
                <xsl:value-of>HR Partner</xsl:value-of>
                <xsl:value-of select="$linefeed"/>
            </xsl:if>
        </xsl:for-each> 
    </xsl:for-each-group>
</xsl:template>

Solution

  • Try the solution given below:

    <?xml version="1.0" encoding="UTF-8" ?>
    <xsl:transform version="2.0"
      xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
      xmlns:wd="http://dummy.com"
      xpath-default-namespace="http://dummy.com"
      exclude-result-prefixes="#all">
      <xsl:output method="text" encoding="UTF-8" indent="yes" />
      <xsl:strip-space elements="*"/>
    
      <xsl:key name="Partners" match="//HR_Partner"
        use="concat(../Cost_Center_ID, '/', ID)"/>
      <xsl:key name="Managers" match="//Manager"
        use="concat(../Cost_Center_ID, '/', Employee_ID)"/>
      <xsl:variable name="lf" select="'&#xA;'"/>
    
      <xsl:template match="Report_Data">
        <xsl:for-each-group select="Report_Entry" group-by="Cost_Center_ID">
          <xsl:for-each select="current-group()">
            <xsl:sort select="Manager/Employee_ID"/>
            <xsl:for-each select="Manager">
              <xsl:variable name="kk" select="concat(../Cost_Center_ID, '/', Employee_ID)"/>
              <xsl:if test="generate-id()=generate-id(key('Managers', $kk)[1])">
                <xsl:value-of select="Employee_ID"/>
                <xsl:text>; </xsl:text>
                <xsl:value-of select="../Cost_Center_ID"/>
                <xsl:text>; </xsl:text>
                <xsl:value-of select="../Role_ID"/>
                <xsl:text>; Manager;</xsl:text>
                <xsl:value-of select="$lf"/>
              </xsl:if>
            </xsl:for-each>
            <xsl:for-each select="HR_Partner">
              <xsl:variable name="kk" select="concat(../Cost_Center_ID, '/', ID)"/>
              <xsl:if test="generate-id()=generate-id(key('Partners', $kk)[1])">
                <xsl:value-of select="ID"/>
                <xsl:text>; </xsl:text>
                <xsl:value-of select="../Cost_Center_ID"/>
                <xsl:text>; </xsl:text>
                <xsl:value-of select="../Role_ID"/>
                <xsl:text>; HR Partner;</xsl:text>
                <xsl:value-of select="$lf"/>
              </xsl:if>
            </xsl:for-each>
          </xsl:for-each>
        </xsl:for-each-group>
      </xsl:template>
    </xsl:transform>
    

    I tested it using online XSLT verifier, with input XML as below:

    <?xml version="1.0" encoding="UTF-8"?>
    <wd:main xmlns:wd="http://dummy.com">
      <wd:Report_Data>
      <!-- Here your Report_Entry records -->
      </wd:Report_Data>
    </wd:main>
    

    Note that the sort order is diffrent from your desired output, due to the sorting applied.