Search code examples
xmlxsltxslt-3.0

Pull data from an element outside of a loop using a common field while streaming


I raised a similar question previously but my XML structure has changed / I need to utilise streaming and haven't been able to get this working with my current solution.

I'm looping through XML but some of the fields I need to reference sit outside of my loop and are dependent on an ID from the current loop, there is a common ID field that I can use to match up the elements but I need them to match within the context of the outer loop and not the entire document. I have been trying to use a Key but haven't been able to get it working while streaming / As a test I removed the streaming references I somewhat get what I need but it matches against the entire document rather than my loop.

<Date>13/10/2023 15/10/2023 20/10/2023</Date>

XML Structure

<Company><Employee_Stack_Grouped>
<Employee_Stack>
  <Employee>
    <Details>
      <EmployeeID>ABC11111</EmployeeID>
    </Details>
    <Changes sequence="0">
      <ChangeDetails>
        <entryDate>20/10/2023</entryDate>
      </ChangeDetails>
      <Plans>
        <Plan delete="Y">
          <Name>Plan 1</Name>
          <Start>01/01/2023</Start>
          <ID>12345</ID>
        </Plan>
        <Plan updated="Y">
          <Name>Plan 1</Name>
          <Start>13/10/2023</Start>
          <ID>12345</ID>
        </Plan>
        <Plan updated="Y">
          <Name>Plan 2</Name>
          <Start>13/10/2023</Start>
          <ID>67890</ID>
        </Plan>
        <Plan>
          <Name>Plan 3</Name>
          <Start>01/01/2023</Start>
          <ID>11111</ID>
        </Plan>
        <Interest_Plan updated="Y">
          <Name>Plan 4</Name>
          <Start>13/10/2023</Start>
          <ID>22222</ID>
        </Interest_Plan>
      </Plans>
      <Amounts>
        <Earning updated="Y">
          <Amount>1000</Amount>
          <Plan>
            <ID>12345</ID>
          </Plan>
        </Earning>
        <Earning updated="Y">
          <Amount>1000</Amount>
          <Plan>
            <ID>67890</ID>
          </Plan>
        </Earning>
        <Earning>
          <Amount>100</Amount>
          <Plan>
            <ID>11111</ID>
          </Plan>
        </Earning>
        <Earning updated="Y">
          <Amount>5000</Amount>
          <Plan>
            <ID>22222</ID>
          </Plan>
        </Earning>
      </Amounts>
    </Changes>
    <Changes sequence="1">
      <ChangeDetails>
        <entryDate>23/10/2023</entryDate>
      </ChangeDetails>
      <Plans>
        <Plan>
          <Name>Plan 1</Name>
          <Start>13/10/2023</Start>
          <ID>12345</ID>
        </Plan>
        <Plan updated="Y">
          <Name>Plan 2</Name>
          <Start>15/10/2023</Start>
          <ID>67890</ID>
        </Plan>
        <Plan>
          <Name>Plan 3</Name>
          <Start>01/01/2023</Start>
          <ID>11111</ID>
        </Plan>
        <Interest_Plan>
          <Name>Plan 4</Name>
          <Start>13/10/2023</Start>
          <ID>22222</ID>
        </Interest_Plan>
      </Plans>
      <Amounts>
        <Earning>
          <Amount>1000</Amount>
          <Plan>
            <ID>12345</ID>
          </Plan>
        </Earning>
        <Earning updated="Y">
          <Amount>2500</Amount>
          <Plan>
            <ID>67890</ID>
          </Plan>
        </Earning>
        <Earning>
          <Amount>100</Amount>
          <Plan>
            <ID>11111</ID>
          </Plan>
        </Earning>
        <Earning>
          <Amount>5000</Amount>
          <Plan>
            <ID>22222</ID>
          </Plan>
        </Earning>
      </Amounts>
    </Changes>
  </Employee>
  <Employee>
    <Details>
      <EmployeeID>ABC222222</EmployeeID>
    </Details>
    <Changes sequence="0">
      <ChangeDetails>
        <entryDate>23/10/2023</entryDate>
      </ChangeDetails>
      <Plans>
        <Plan updated="Y">
          <Name>Plan 1</Name>
          <Start>20/10/2023</Start>
          <ID>12345</ID>
        </Plan>
        <Plan updated="Y">
          <Name>Plan 2</Name>
          <Start>20/10/2023</Start>
          <ID>67890</ID>
        </Plan>
        <Plan>
          <Name>Plan 3</Name>
          <Start>01/01/2023</Start>
          <ID>11111</ID>
        </Plan>
        <Interest_Plan updated="Y">
          <Name>Plan 4</Name>
          <Start>13/10/2023</Start>
          <ID>22222</ID>
        </Interest_Plan>
      </Plans>
      <Amounts>
        <Earning updated="Y">
          <Amount>1000</Amount>
          <Plan>
            <ID>12345</ID>
          </Plan>
        </Earning>
        <Earning updated="Y">
          <Amount>1000</Amount>
          <Plan>
            <ID>67890</ID>
          </Plan>
        </Earning>
        <Earning>
          <Amount>100</Amount>
          <Plan>
            <ID>11111</ID>
          </Plan>
        </Earning>
        <Earning updated="Y">
          <Amount>5000</Amount>
          <Plan>
            <ID>22222</ID>
          </Plan>
        </Earning>
      </Amounts>
    </Changes>
  </Employee>
</Employee_Stack></Employee_Stack_Grouped></Company>

XSL uncommenting the Key value throws an error: the node supplied must be in a tree whose root is a document node.

<xsl:stylesheet version="3.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" encoding="UTF-8" indent="yes"/>
<xsl:mode streamable="true" on-no-match="shallow-skip"/>

<xsl:key name="plan" match="Plans/Plan[@updated = 'Y'] | Interest_Plan[@updated = 'Y']" use="ID"/>

<xsl:template match="/Company">
    <File>
        <xsl:for-each select="Employee_Stack_Grouped/Employee_Stack/Employee/copy-of()">
            <xsl:variable name="employeeID" select="Details/EmployeeID"/>
            <xsl:for-each select="Changes">
                <xsl:variable name="entryDate" select="ChangeDetails/entryDate"/>

                <xsl:for-each select="Amounts/Earning[@updated = 'Y']">
                    <row>
                        <Entry>
                            <xsl:value-of select="$entryDate"/>
                        </Entry>
                        <Employee_Number>
                            <xsl:value-of select="$employeeID"/>
                        </Employee_Number>
                        <Date>
                            <!-- <xsl:value-of select="key('plan', Plan/ID)/Start"/> -->
                        </Date>
                        <Amount>
                            <xsl:value-of select="Amount"/>
                        </Amount>
                        <ID>
                            <xsl:value-of select="Plan/ID"/>
                        </ID>
                    </row>
                </xsl:for-each>
            </xsl:for-each>
        </xsl:for-each>
    </File>
</xsl:template></xsl:stylesheet>

Expected Output

<File>
<row>
    <Entry>20/10/2023</Entry>
    <Employee_Number>ABC11111</Employee_Number>
    <Date>13/10/2023</Date>
    <Amount>1000</Amount>
    <ID>12345</ID>
</row>
<row>
    <Entry>20/10/2023</Entry>
    <Employee_Number>ABC11111</Employee_Number>
    <Date>13/10/2023</Date>
    <Amount>1000</Amount>
    <ID>67890</ID>
</row>
<row>
    <Entry>20/10/2023</Entry>
    <Employee_Number>ABC11111</Employee_Number>
    <Date>13/10/2023</Date>
    <Amount>5000</Amount>
    <ID>22222</ID>
</row>
<row>
    <Entry>23/10/2023</Entry>
    <Employee_Number>ABC11111</Employee_Number>
    <Date>15/10/2023</Date>
    <Amount>2500</Amount>
    <ID>67890</ID>
</row>
<row>
    <Entry>23/10/2023</Entry>
    <Employee_Number>ABC222222</Employee_Number>
    <Date>20/10/2023</Date>
    <Amount>1000</Amount>
    <ID>12345</ID>
</row>
<row>
    <Entry>23/10/2023</Entry>
    <Employee_Number>ABC222222</Employee_Number>
    <Date>20/10/2023</Date>
    <Amount>1000</Amount>
    <ID>67890</ID>
</row>
<row>
    <Entry>23/10/2023</Entry>
    <Employee_Number>ABC222222</Employee_Number>
    <Date>13/10/2023</Date>
    <Amount>5000</Amount>
    <ID>22222</ID>
</row></File>

Note there can be multiple Employee_Stack_Grouped, then multiple Employee per Employee_Stack, and finally multiple Changes per Employee. I essentially need to output any Earnings with an updated Flag but reference the date from the Changes/Plan elements.

I'm not sure if I can achieve this using a Key while streaming? As I can't get this working the other option I am thinking of doing is while looping through the Changes element have another loop that will store any updated Plans in a map, then reference this as I loop through the earnings. any advice on how I can achieve something like this?


Solution

  • Using a capturing accumulutar (currently Saxon extension, probably standardized in XSLT 4) you can use e.g.

    <xsl:stylesheet version="3.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
      xmlns:xs="http://www.w3.org/2001/XMLSchema"
      xmlns:saxon="http://saxon.sf.net/"
      xmlns:map="http://www.w3.org/2005/xpath-functions/map"
      exclude-result-prefixes="#all">
    <xsl:output method="xml" encoding="UTF-8" indent="yes"/>
    <xsl:mode streamable="true" on-no-match="shallow-skip" use-accumulators="plans"/>
    
    <xsl:accumulator name="plans" as="map(xs:integer, map(xs:string,xs:string))" initial-value="map{}" streamable="yes">
      <xsl:accumulator-rule phase="end" saxon:capture="yes" match="Plans/Plan[@updated = 'Y'] | Interest_Plan[@updated = 'Y']"
        select="map:put($value, xs:integer(ID), map:merge(*!map:entry(local-name(), string())))"/>
    </xsl:accumulator>
    
    <xsl:template match="/Company">
        <File>
            <xsl:for-each select="Employee_Stack_Grouped/Employee_Stack/Employee/copy-of()">
                <xsl:variable name="employeeID" select="Details/EmployeeID"/>
                <xsl:for-each select="Changes">
                    <xsl:variable name="entryDate" select="ChangeDetails/entryDate"/>
    
                    <xsl:for-each select="Amounts/Earning[@updated = 'Y']">
                        <row>
                            <Entry>
                                <xsl:value-of select="$entryDate"/>
                            </Entry>
                            <Employee_Number>
                                <xsl:value-of select="$employeeID"/>
                            </Employee_Number>
                            <Date>
                                <xsl:value-of select="accumulator-before('plans')(xs:integer(Plan/ID))?Start"/>
                            </Date>
    

    If you don't have the Saxon extension of a "capturing" accumulator you can nevertheless try to store the data in several accumulator and then, one you process an ID, create the map entry:

    <xsl:stylesheet version="3.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
      xmlns:xs="http://www.w3.org/2001/XMLSchema"
      xmlns:saxon="http://saxon.sf.net/"
      xmlns:map="http://www.w3.org/2005/xpath-functions/map"
      exclude-result-prefixes="#all">
    <xsl:output method="xml" encoding="UTF-8" indent="yes"/>
    <xsl:mode streamable="true" on-no-match="shallow-skip" use-accumulators="#all"/>
    
    <xsl:accumulator name="name" as="xs:string?" initial-value="()" streamable="yes">
      <xsl:accumulator-rule match="Plans/Plan[@updated = 'Y']/Name/text() | Interest_Plan[@updated = 'Y']/Name/text()" select="string()"/>
    </xsl:accumulator>
    
    <xsl:accumulator name="start" as="xs:string?" initial-value="()" streamable="yes">
      <xsl:accumulator-rule match="Plans/Plan[@updated = 'Y']/Start/text() | Interest_Plan[@updated = 'Y']/Start/text()" select="string()"/>
    </xsl:accumulator>
    
    
    <xsl:accumulator name="plans" as="map(xs:integer, map(xs:string,xs:string))" initial-value="map{}" streamable="yes">
      <xsl:accumulator-rule match="Plans/Plan[@updated = 'Y']/ID/text() | Interest_Plan[@updated = 'Y']/ID/text()"
        select="map:put($value, xs:integer(.), map { 'Start' : accumulator-before('start'), 'Name' : accumulator-before('name') })"/>
    </xsl:accumulator>
    
    <xsl:template match="/Company">
        <File>
            <xsl:for-each select="Employee_Stack_Grouped/Employee_Stack/Employee/copy-of()">
                <xsl:variable name="employeeID" select="Details/EmployeeID"/>
                <xsl:for-each select="Changes">
                    <xsl:variable name="entryDate" select="ChangeDetails/entryDate"/>
    
                    <xsl:for-each select="Amounts/Earning[@updated = 'Y']">
                        <row>
                            <Entry>
                                <xsl:value-of select="$entryDate"/>
                            </Entry>
                            <Employee_Number>
                                <xsl:value-of select="$employeeID"/>
                            </Employee_Number>
                            <Date>
                                <xsl:value-of select="accumulator-before('plans')(xs:integer(Plan/ID))?Start"/>
                            </Date>
    

    You will need to test yourself whether that approach works way back with 9.7 EE, as I said, the final XSLT 3 version was produced after that release and only 9.8 and later are supposed to implement it.