Search code examples
xmlxsltgroupingxslt-2.0xslt-grouping

Grouping of consecutive days for different employees


I have a xml having different entries for different days for the same employee as below:

<wd:Report_Data
    <wd:Report_Entry>
        <wd:workerGroup>
            <wd:staffID>111</wd:staffID>
        </wd:workerGroup>
        <wd:workerType>Staff</wd:workerType>
        <wd:requestType>Absence Request</wd:requestType>
        <wd:date>2020-08-21-07:00</wd:date>
    </wd:Report_Entry>
    <wd:Report_Entry>
        <wd:workerGroup>
            <wd:staffID>111</wd:staffID>
        </wd:workerGroup>
        <wd:workerType>Staff</wd:workerType>
        <wd:requestType>Absence Request</wd:requestType>
        <wd:date>2020-08-22-07:00</wd:date>
    </wd:Report_Entry>
    <wd:Report_Entry>
        <wd:workerGroup>
            <wd:staffID>222</wd:staffID>
        </wd:workerGroup>
        <wd:workerType>Staff</wd:workerType>
        <wd:requestType>Absence Request</wd:requestType>
        <wd:date>2020-08-23-07:00</wd:date>
    </wd:Report_Entry>
    <wd:Report_Entry>
        <wd:workerGroup>
            <wd:staffID>333</wd:staffID>
        </wd:workerGroup>
        <wd:workerType>Staff</wd:workerType>
        <wd:requestType>Absence Request</wd:requestType>
        <wd:date>2020-08-15-07:00</wd:date>
    </wd:Report_Entry>
    <wd:Report_Entry>
        <wd:workerGroup>
            <wd:staffID>333</wd:staffID>
        </wd:workerGroup>
        <wd:workerType>Staff</wd:workerType>
        <wd:requestType>Absence Request</wd:requestType>
        <wd:date>2020-08-16-07:00</wd:date>
    </wd:Report_Entry>
    <wd:Report_Entry>
        <wd:workerGroup>
            <wd:staffID>333</wd:staffID>
        </wd:workerGroup>
        <wd:workerType>Staff</wd:workerType>
        <wd:requestType>Absence Request</wd:requestType>
        <wd:date>2020-08-29-07:00</wd:date>
    </wd:Report_Entry>
</wd:Report_Data>

I want an output which groups the consecutive days with a start date and an end date as below:

<wd:Report_Entry>
   <worker>
      <staffID>111</staffID>
      <start_date>2020-08-21-07:00</start_date>
      <end_date>2020-08-22-07:00</end_date>
   </worker>
   <worker>
      <staffID>222</staffID>
      <start_date>2020-08-23-07:00</start_date>
      <end_date>2020-08-23-07:00</end_date>
   </worker>
   <worker>
      <staffID>333</staffID>
      <start_date>2020-08-15-07:00</start_date>
      <end_date>2020-08-16-07:00</end_date>
   </worker>
   <worker>
      <staffID>333</staffID>
      <start_date>2020-08-29-07:00</start_date>
      <end_date>2020-08-29-07:00</end_date>
   </worker>
</wd:Report_Entry>

I have tried using group-starting-with but it is grouping irrespective of staffID.

<xsl:template match="/wd:Report_Data">
  
    <wd:Report_Entry>
              <xsl:for-each-group select="wd:Report_Entry" group-starting-with="*[not(xs:date(wd:date) = xs:date(preceding-sibling::*[1]/wd:date) + xs:dayTimeDuration('P1D'))] "> 
            <worker>
                <staffID>
                    <xsl:value-of select="wd:workerGroup/wd:staffID"/>
                </staffID>
                <start_date>
                    <xsl:value-of select="wd:date"/>
                </start_date>
                <end_date>
                    <xsl:value-of select="current-group()[last()]/wd:date"/>
                </end_date>
            </worker>
            </xsl:for-each-group>
        <!--   </xsl:for-each-group> --> 
    </wd:Report_Entry>
   
</xsl:template>

Getting output as below which is WRONG:

<wd:Report_Entry>
   <worker>
      <staffID>111</staffID>
      <start_date>2020-08-21-07:00</start_date>
      <end_date>2020-08-23-07:00</end_date>
   </worker>
</wd:Report_Entry>

Solution

  • It seems sufficient to use group-by on the staff id first and then sort the group on the date to then group by adjacent dates minus position() * 1 day:

    <?xml version="1.0" encoding="utf-8"?>
    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
        version="3.0"
        xmlns:xs="http://www.w3.org/2001/XMLSchema"
        xpath-default-namespace="http://example.com/wd"
        exclude-result-prefixes="#all"
        expand-text="yes">
        
        <xsl:output method="xml" indent="yes"/>
        
        <xsl:template match="Report_Data">
            <root>
                <xsl:for-each-group select="Report_Entry" group-by="workerGroup/staffID">
                    <xsl:for-each-group select="sort(current-group(), (), function($e) { $e/date })" group-adjacent="xs:date(date) - position() * xs:dayTimeDuration('P1D')">
                        <worker>
                            <staffID>
                                <xsl:value-of select="workerGroup/staffID"/>
                            </staffID>
                            <start_date>
                                <xsl:value-of select="date[1]"/>
                            </start_date>
                            <end_date>
                                <xsl:value-of select="current-group()[last()]/date"/>
                            </end_date>
                        </worker>
                    </xsl:for-each-group>
                </xsl:for-each-group>
            </root>
        </xsl:template>
        
    </xsl:stylesheet>
    

    For an XSLT 2 processor you might need to implement the sorting in a user-defined function based on xsl:perform-sort:

    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
        version="3.0"
        xmlns:xs="http://www.w3.org/2001/XMLSchema"
        xmlns:mf="http://example.com/mf"
        xpath-default-namespace="http://example.com/wd"
        exclude-result-prefixes="#all"
        expand-text="yes">
        
        <xsl:function name="mf:sort-by-date" as="element(Report_Entry)*">
            <xsl:param name="entries" as="element(Report_Entry)*"/>
            <xsl:perform-sort select="$entries">
                <xsl:sort select="date"/>
            </xsl:perform-sort>
        </xsl:function>
        
        <xsl:output method="xml" indent="yes"/>
        
        <xsl:template match="Report_Data">
            <root>
                <xsl:for-each-group select="Report_Entry" group-by="workerGroup/staffID">
                    <xsl:for-each-group select="mf:sort-by-date(current-group())" group-adjacent="xs:date(date) - position() * xs:dayTimeDuration('P1D')">
                        <worker>
                            <staffID>
                                <xsl:value-of select="workerGroup/staffID"/>
                            </staffID>
                            <start_date>
                                <xsl:value-of select="date[1]"/>
                            </start_date>
                            <end_date>
                                <xsl:value-of select="current-group()[last()]/date"/>
                            </end_date>
                        </worker>
                    </xsl:for-each-group>
                </xsl:for-each-group>
            </root>
        </xsl:template>
        
    </xsl:stylesheet>
    

    Of course the sorting is only necessary if the input is not sorted by staff id and date, otherwise the presented grouping should suffice.