Search code examples
nosqlxquery

group and add nodes in memory XML


I have below input:

<income>
    <flow>
        <Year>2020-2021</Year>
        <Period>8</Period>
        <Date>2020-11-30</Date>
        <Metric>in</Metric>
        <Monthly_Value>1</Monthly_Value>
    </flow>
    <flow>
        <Year>2020-2021</Year>
        <Period>8</Period>
        <Date>2020-11-30</Date>
        <Metric>out</Metric>
        <Monthly_Value>4</Monthly_Value>
    </flow>
    <flow>
        <Year>2020-2021</Year>
        <Period>9</Period>
        <Date>2020-12-31</Date>
        <Metric>in</Metric>
        <Monthly_Value>3</Monthly_Value>
    </flow>
    <flow>
        <Year>2020-2021</Year>
        <Period>9</Period>
        <Date>2020-12-31</Date>
        <Metric>out</Metric>
        <Monthly_Value>2</Monthly_Value>
    </flow>
    <flow>
        <Year>2020-2021</Year>
        <Period>10</Period>
        <Date>2021-01-31</Date>
        <Metric>in</Metric>
        <Monthly_Value>2</Monthly_Value>
    </flow>
    <flow>
        <Year>2020-2021</Year>
       <Period>10</Period>
        <Date>2021-01-31</Date>
        <Metric>out</Metric>
        <Monthly_Value>1</Monthly_Value>
    </flow>
    <flow>
        <Year>2020-2021</Year>
        <Period>11</Period>
        <Date>2021-02-28</Date>
        <Metric>in</Metric>
        <Monthly_Value>1</Monthly_Value>
    </flow>
    <flow>
        <Year>2020-2021</Year>
        <Period>11</Period>
        <Date>2021-02-28</Date>
        <Metric>out</Metric>
        <Monthly_Value>3</Monthly_Value>
    </flow>
    <flow>
        <Year>2020-2021</Year>
        <Period>12</Period>
        <Date>2021-03-31</Date>
        <Metric>in</Metric>
        <Monthly_Value>2</Monthly_Value>
    </flow>
    <flow>
        <Year>2020-2021</Year>
        <Period>12</Period>
        <Date>2021-03-31</Date>
        <Metric>out</Metric>
        <Monthly_Value>1</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>1</Period>
        <Date>2021-04-30</Date>
        <Metric>in</Metric>
        <Monthly_Value>1</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>1</Period>
        <Date>2021-04-30</Date>
        <Metric>out</Metric>
        <Monthly_Value>2</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>2</Period>
        <Date>2021-05-31</Date>
        <Metric>in</Metric>
        <Monthly_Value>2</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>2</Period>
        <Date>2021-05-31</Date>
        <Metric>out</Metric>
        <Monthly_Value>1</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>4</Period>
        <Date>2021-07-31</Date>
        <Metric>out</Metric>
        <Monthly_Value>1</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>5</Period>
        <Date>2021-08-31</Date>
        <Metric>in</Metric>
        <Monthly_Value>3</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>5</Period>
        <Date>2021-08-31</Date>
        <Metric>out</Metric>
        <Monthly_Value>1</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>6</Period>
        <Date>2021-09-30</Date>
        <Metric>in</Metric>
        <Monthly_Value>2</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>6</Period>
        <Date>2021-09-30</Date>
        <Metric>out</Metric>
        <Monthly_Value>2</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>7</Period>
        <Date>2021-10-31</Date>
        <Metric>in</Metric>
        <Monthly_Value>2</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>7</Period>
        <Date>2021-10-31</Date>
        <Metric>out</Metric>
        <Monthly_Value>5</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>8</Period>
        <Date>2021-11-30</Date>
        <Metric>in</Metric>
        <Monthly_Value>1</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>8</Period>
        <Date>2021-11-30</Date>
        <Metric>out</Metric>
        <Monthly_Value>1</Monthly_Value>
    </flow>
</income>

I need to group Metric & Year, sort ascending on Period first. This process generates 4 groups: 

  1. in & 2020-2021 
  2. in & 2021-2022
  3. out & 2020-2021
  4. out & 2021-2022

Then I get the previous Monthly_Value, then add new node after each Monthly_Value node as Previous_Value.

Expected output:

<income>
   <flows>
      <Year>2020-2021</Year>
      <flow>
         <Year>2020-2021</Year>
         <Period>8</Period>
         <Date>2020-11-30</Date>
         <Metric>in</Metric>
         <Monthly_Value>1</Monthly_Value>
         <Previous_Value>1</Previous_Value>
      </flow>
      <flow>
         <Year>2020-2021</Year>
         <Period>9</Period>
         <Date>2020-12-31</Date>
         <Metric>in</Metric>
         <Monthly_Value>3</Monthly_Value>
         <Previous_Value>1</Previous_Value>
      </flow>
      <flow>
         <Year>2020-2021</Year>
         <Period>10</Period>
         <Date>2021-01-31</Date>
         <Metric>in</Metric>
         <Monthly_Value>2</Monthly_Value>
         <Previous_Value>3</Previous_Value>
      </flow>
      <flow>
         <Year>2020-2021</Year>
         <Period>11</Period>
         <Date>2021-02-28</Date>
         <Metric>in</Metric>
         <Monthly_Value>1</Monthly_Value>
         <Previous_Value>2</Previous_Value>
      </flow>
      <flow>
         <Year>2020-2021</Year>
         <Period>12</Period>
         <Date>2021-03-31</Date>
         <Metric>in</Metric>
         <Monthly_Value>2</Monthly_Value>
         <Previous_Value>1</Previous_Value>
      </flow>
   </flows>
   <flows>
      <Year>2021-2022</Year>
      <flow>
         <Year>2021-2022</Year>
         <Period>1</Period>
         <Date>2021-04-30</Date>
         <Metric>in</Metric>
         <Monthly_Value>1</Monthly_Value>
         <Previous_Value>1</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>2</Period>
         <Date>2021-05-31</Date>
         <Metric>in</Metric>
         <Monthly_Value>2</Monthly_Value>
         <Previous_Value>1</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>3</Period>
         <Date>2021-06-30</Date>
         <Metric>in</Metric>
         <Monthly_Value>2</Monthly_Value>
         <Previous_Value>2</Previous_Value>
      </flow>
 <flow>
         <Year>2021-2022</Year>
         <Period>4</Period>
         <Date>2021-07-31</Date>
         <Metric>in</Metric>
         <Monthly_Value>1</Monthly_Value>
         <Previous_Value>2</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>5</Period>
         <Date>2021-08-31</Date>
         <Metric>in</Metric>
         <Monthly_Value>3</Monthly_Value>
         <Previous_Value>1</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>6</Period>
         <Date>2021-09-30</Date>
         <Metric>in</Metric>
         <Monthly_Value>2</Monthly_Value>
         <Previous_Value>3</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>7</Period>
         <Date>2021-10-31</Date>
         <Metric>in</Metric>
         <Monthly_Value>2</Monthly_Value>
         <Previous_Value>2</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>8</Period>
         <Date>2021-11-30</Date>
         <Metric>in</Metric>
         <Monthly_Value>1</Monthly_Value>
         <Previous_Value>2</Previous_Value>
      </flow>
   </flows>
   <flows>
      <Year>2020-2021</Year>
      <flow>
         <Year>2020-2021</Year>
         <Period>8</Period>
         <Date>2020-11-30</Date>
         <Metric>out</Metric>
         <Monthly_Value>4</Monthly_Value>
         <Previous_Value>4</Previous_Value>
      </flow>
      <flow>
         <Year>2020-2021</Year>
         <Period>9</Period>
         <Date>2020-12-31</Date>
         <Metric>out</Metric>
         <Monthly_Value>2</Monthly_Value>
         <Previous_Value>4</Previous_Value>
      </flow>
      <flow>
         <Year>2020-2021</Year>
         <Period>10</Period>
         <Date>2021-01-31</Date>
         <Metric>out</Metric>
         <Monthly_Value>1</Monthly_Value>
         <Previous_Value>2</Previous_Value>
      </flow>
 <flow>
         <Year>2020-2021</Year>
         <Period>11</Period>
         <Date>2021-02-28</Date>
         <Metric>out</Metric>
         <Monthly_Value>3</Monthly_Value>
         <Previous_Value>1</Previous_Value>
      </flow>
      <flow>
         <Year>2020-2021</Year>
         <Period>12</Period>
         <Date>2021-03-31</Date>
         <Metric>out</Metric>
         <Monthly_Value>1</Monthly_Value>
         <Previous_Value>3</Previous_Value>
      </flow>
   </flows>
   <flows>
      <Year>2021-2022</Year>
      <flow>
         <Year>2021-2022</Year>
         <Period>1</Period>
         <Date>2021-04-30</Date>
         <Metric>out</Metric>
         <Monthly_Value>2</Monthly_Value>
         <Previous_Value>2</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>2</Period>
         <Date>2021-05-31</Date>
         <Metric>out</Metric>
         <Monthly_Value>1</Monthly_Value>
         <Previous_Value>2</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>3</Period>
         <Date>2021-06-30</Date>
         <Metric>out</Metric>
         <Monthly_Value>2</Monthly_Value>
         <Previous_Value>1</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>4</Period>
         <Date>2021-07-31</Date>
         <Metric>out</Metric>
         <Monthly_Value>1</Monthly_Value>
         <Previous_Value>2</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>5</Period>
         <Date>2021-08-31</Date>
         <Metric>out</Metric>
         <Monthly_Value>1</Monthly_Value>
         <Previous_Value>1</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>6</Period>
         <Date>2021-09-30</Date>
         <Metric>out</Metric>
         <Monthly_Value>2</Monthly_Value>
         <Previous_Value>1</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>7</Period>
         <Date>2021-10-31</Date>
         <Metric>out</Metric>
         <Monthly_Value>5</Monthly_Value>
         <Previous_Value>2</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>8</Period>
         <Date>2021-11-30</Date>
         <Metric>out</Metric>
         <Monthly_Value>1</Monthly_Value>
         <Previous_Value>5</Previous_Value>
      </flow>
   </flows>
</income>

My Xquery:

for $metric in  distinct-values($nodes//Metric) 
for $y in  distinct-values($nodes/flow[Metric eq $metric]/Year)
for $p in distinct-values($nodes/flow[Metric eq $metric and Year eq $y]/Period)
order by $p ascending
return 
  <income>{
  for $node in $nodes
  let $mv := $node/preceding-sibling::*
  return 
    <Previous_Value>{$mv}</Previous_Value> 
  }  
  </income>

I get a sequence of 26 blank items:

<income><Previous_Value/></income>

How can I produce the correct output?


Solution

  • For your grouped items, you can't use XPath to look for preceding-sibling:: from the source XML. If you let a variable, such as the $sorted-flows below, of the flow elements ordered by Period, then you can obtain the Monthly_Value from the previous item in that sorted sequence.

    Since the first item won't have a previous, it seemed that you wanted to use it's value as the Previous_Value. The logic below creates a sequence with the previous (if it exists), then the current value and uses head() to obtain the first item from that sequence.

      <income>{
        for $metric in  distinct-values($nodes//Metric) 
        for $y in  distinct-values($nodes/flow[Metric eq $metric]/Year)
        let $sorted-flows := 
          for $p in distinct-values($nodes/flow[Metric eq $metric and Year eq $y]/Period)
          return 
            for $flow in $nodes/flow
            where $flow[Metric eq $metric and Year eq $y and Period eq $p]
            order by $flow/Period ascending
            return $flow
        return
          <flows>{
            element {"year"} {$y},
            for $item at $i in $sorted-flows
            return
              <flow>{
                $item/*,
                <Previous_Value>{
                  (: either the previous Monthly_Value or the current for the first:)
                  ($sorted-flows[$i - 1]/self::flow/Monthly_Value/data(), $item/Monthly_Value/data()) => head()
                }
                </Previous_Value>
              }
              </flow> 
           }
           </flows>
       }
       </income> 
    

    You could also apply an XSLT and take advantage of xsl:for-each-group as an alternative solution:

    let $xslt := 
    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="2.0">
      <xsl:output indent="yes"/>
        
      <xsl:template match="income">
        <xsl:copy>
          <flows>
            <xsl:for-each-group select="flow" group-by="string-join((Year,Metric), '|')" >
              <xsl:sort select="Metric" order="descending"/>
              <flows>
                <Year><xsl:value-of select="Year"/></Year>
                <xsl:variable name="sorted-flows" as="element(flow)*">
                  <xsl:perform-sort select="current-group()">
                    <xsl:sort select="Period/number()" order="ascending"/>
                  </xsl:perform-sort>
                </xsl:variable>
                <xsl:for-each select="$sorted-flows">
                  <xsl:variable name="current-position" select="position()"/>
                  <xsl:copy>
                    <xsl:copy-of select="*"/>
                      <Previous_Value>
                        <xsl:value-of select="(($sorted-flows[$current-position - 1], .))[1]/Monthly_Value"/>
                      </Previous_Value>
                  </xsl:copy>
                </xsl:for-each>
              </flows>
            </xsl:for-each-group>
          </flows>
        </xsl:copy>
      </xsl:template> 
    </xsl:stylesheet>
    return 
      xdmp:xslt-eval($xslt, $nodes)