I have a xml having different entries for different days for the same employee as below:
<wd:Report_Data
<wd:Report_Entry>
<wd:workerGroup>
<wd:staffID>111</wd:staffID>
</wd:workerGroup>
<wd:workerType>Staff</wd:workerType>
<wd:requestType>Absence Request</wd:requestType>
<wd:date>2020-08-21-07:00</wd:date>
</wd:Report_Entry>
<wd:Report_Entry>
<wd:workerGroup>
<wd:staffID>111</wd:staffID>
</wd:workerGroup>
<wd:workerType>Staff</wd:workerType>
<wd:requestType>Absence Request</wd:requestType>
<wd:date>2020-08-22-07:00</wd:date>
</wd:Report_Entry>
<wd:Report_Entry>
<wd:workerGroup>
<wd:staffID>222</wd:staffID>
</wd:workerGroup>
<wd:workerType>Staff</wd:workerType>
<wd:requestType>Absence Request</wd:requestType>
<wd:date>2020-08-23-07:00</wd:date>
</wd:Report_Entry>
<wd:Report_Entry>
<wd:workerGroup>
<wd:staffID>333</wd:staffID>
</wd:workerGroup>
<wd:workerType>Staff</wd:workerType>
<wd:requestType>Absence Request</wd:requestType>
<wd:date>2020-08-15-07:00</wd:date>
</wd:Report_Entry>
<wd:Report_Entry>
<wd:workerGroup>
<wd:staffID>333</wd:staffID>
</wd:workerGroup>
<wd:workerType>Staff</wd:workerType>
<wd:requestType>Absence Request</wd:requestType>
<wd:date>2020-08-16-07:00</wd:date>
</wd:Report_Entry>
<wd:Report_Entry>
<wd:workerGroup>
<wd:staffID>333</wd:staffID>
</wd:workerGroup>
<wd:workerType>Staff</wd:workerType>
<wd:requestType>Absence Request</wd:requestType>
<wd:date>2020-08-29-07:00</wd:date>
</wd:Report_Entry>
</wd:Report_Data>
I want an output which groups the consecutive days with a start date and an end date as below:
<wd:Report_Entry>
<worker>
<staffID>111</staffID>
<start_date>2020-08-21-07:00</start_date>
<end_date>2020-08-22-07:00</end_date>
</worker>
<worker>
<staffID>222</staffID>
<start_date>2020-08-23-07:00</start_date>
<end_date>2020-08-23-07:00</end_date>
</worker>
<worker>
<staffID>333</staffID>
<start_date>2020-08-15-07:00</start_date>
<end_date>2020-08-16-07:00</end_date>
</worker>
<worker>
<staffID>333</staffID>
<start_date>2020-08-29-07:00</start_date>
<end_date>2020-08-29-07:00</end_date>
</worker>
</wd:Report_Entry>
I have tried using group-starting-with but it is grouping irrespective of staffID.
<xsl:template match="/wd:Report_Data">
<wd:Report_Entry>
<xsl:for-each-group select="wd:Report_Entry" group-starting-with="*[not(xs:date(wd:date) = xs:date(preceding-sibling::*[1]/wd:date) + xs:dayTimeDuration('P1D'))] ">
<worker>
<staffID>
<xsl:value-of select="wd:workerGroup/wd:staffID"/>
</staffID>
<start_date>
<xsl:value-of select="wd:date"/>
</start_date>
<end_date>
<xsl:value-of select="current-group()[last()]/wd:date"/>
</end_date>
</worker>
</xsl:for-each-group>
<!-- </xsl:for-each-group> -->
</wd:Report_Entry>
</xsl:template>
Getting output as below which is WRONG:
<wd:Report_Entry>
<worker>
<staffID>111</staffID>
<start_date>2020-08-21-07:00</start_date>
<end_date>2020-08-23-07:00</end_date>
</worker>
</wd:Report_Entry>
It seems sufficient to use group-by
on the staff id first and then sort the group on the date to then group by adjacent dates minus position() * 1 day:
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="3.0"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xpath-default-namespace="http://example.com/wd"
exclude-result-prefixes="#all"
expand-text="yes">
<xsl:output method="xml" indent="yes"/>
<xsl:template match="Report_Data">
<root>
<xsl:for-each-group select="Report_Entry" group-by="workerGroup/staffID">
<xsl:for-each-group select="sort(current-group(), (), function($e) { $e/date })" group-adjacent="xs:date(date) - position() * xs:dayTimeDuration('P1D')">
<worker>
<staffID>
<xsl:value-of select="workerGroup/staffID"/>
</staffID>
<start_date>
<xsl:value-of select="date[1]"/>
</start_date>
<end_date>
<xsl:value-of select="current-group()[last()]/date"/>
</end_date>
</worker>
</xsl:for-each-group>
</xsl:for-each-group>
</root>
</xsl:template>
</xsl:stylesheet>
For an XSLT 2 processor you might need to implement the sorting in a user-defined function based on xsl:perform-sort
:
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="3.0"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:mf="http://example.com/mf"
xpath-default-namespace="http://example.com/wd"
exclude-result-prefixes="#all"
expand-text="yes">
<xsl:function name="mf:sort-by-date" as="element(Report_Entry)*">
<xsl:param name="entries" as="element(Report_Entry)*"/>
<xsl:perform-sort select="$entries">
<xsl:sort select="date"/>
</xsl:perform-sort>
</xsl:function>
<xsl:output method="xml" indent="yes"/>
<xsl:template match="Report_Data">
<root>
<xsl:for-each-group select="Report_Entry" group-by="workerGroup/staffID">
<xsl:for-each-group select="mf:sort-by-date(current-group())" group-adjacent="xs:date(date) - position() * xs:dayTimeDuration('P1D')">
<worker>
<staffID>
<xsl:value-of select="workerGroup/staffID"/>
</staffID>
<start_date>
<xsl:value-of select="date[1]"/>
</start_date>
<end_date>
<xsl:value-of select="current-group()[last()]/date"/>
</end_date>
</worker>
</xsl:for-each-group>
</xsl:for-each-group>
</root>
</xsl:template>
</xsl:stylesheet>
Of course the sorting is only necessary if the input is not sorted by staff id and date, otherwise the presented grouping should suffice.