Search code examples
xmlxslt

XSLT Aggregation using sort with Muenchian Method for min and max


I understand the Muenchian Method indexes the entire document at first read. But how can one sort prior or within grouping but not on its key? Specifically, how can I order a sibling of the key in order to calculate min and max of a series of element values?

As XML data shows below, I am attempting to aggregate Companies by Industry. Across the seven metrics (revenue, assets, equity, netincome, stockprice, and employees) I can achieve various aggregates: sum with sum() and average with sum() div count(). But in order to obtain min and max particularly for netincome, I need to sort companies within industry by netincome and select the value at a position(). Do note, there are exactly five companies per industry. So for decreasing netincome sort, position()=1 is the maximum and position()=5 would be minimum.

Now, I can achieve my desired results running two XSLT scripts. The first sorts and second aggregates. But how can I do so with one XSLT? Within for each loop of Muenchian key I tried the below all to no avail.

<xsl:sort select="../netincome" data-type="number" order="descending"/>

<xsl:sort select="key('indkey', .)/../netincome" data-type="number" order="descending"/>

<xsl:sort select="key('indkey', .)[../netincome]" data-type="number" order="descending"/>

Possibly, the min and max must be done in a template outside the others or run two passes/call templates of the XSLT scripts or using <xsl:with-params>.

XML data

<?xml version="1.0" encoding="UTF-8"?>
<data>
    <bigcompany>
        <company>Company OA</company>
        <industry>Oil &amp; Gas</industry>
        <revenue>394105000000</revenue>
        <assets>349493000000</assets>
        <equity>174399000000</equity>
        <netincome>32520000000</netincome>
        <stockprice>89.38</stockprice>
        <employees>75300</employees>
    </bigcompany>
    <bigcompany>
        <company>Company OB</company>
        <industry>Oil &amp; Gas</industry>
        <revenue>200494000000</revenue>
        <assets>266026000000</assets>
        <equity>156191000000</equity>
        <netincome>19241000000</netincome>
        <stockprice>108.62</stockprice>
        <employees>64700</employees>
    </bigcompany>
    <bigcompany>
        <company>Company OC</company>
        <industry>Oil &amp; Gas</industry>
        <revenue>13807000000</revenue>
        <assets>4726000000</assets>
        <equity>16445000000</equity>
        <netincome>2720000000</netincome>
        <stockprice>48.5</stockprice>
        <employees>22000</employees>
    </bigcompany>
    <bigcompany>
        <company>Company OD</company>
        <industry>Oil &amp; Gas</industry>
        <revenue>97800000000</revenue>
        <assets>30500000000</assets>
        <equity>10800000000</equity>
        <netincome>2700000000</netincome>
        <stockprice>27.53</stockprice>
        <employees>45340</employees>
    </bigcompany>
    <bigcompany>
        <company>Company OE</company>
        <industry>Oil &amp; Gas</industry>
        <revenue>62004000000</revenue>
        <assets>117144000000</assets>
        <equity>48427000000</equity>
        <netincome>8428000000</netincome>
        <stockprice>66.66</stockprice>
        <employees>16900</employees>
    </bigcompany>
    <bigcompany>
        <company>Company PA</company>
        <industry>Pharmaceuticals</industry>
        <revenue>49605000000</revenue>
        <assets>169274000000</assets>
        <equity>71622000000</equity>
        <netincome>9135000000</netincome>
        <stockprice>30.14</stockprice>
        <employees>78000</employees>
    </bigcompany>
    <bigcompany>
        <company>Company PB</company>
        <industry>Pharmaceuticals</industry>
        <revenue>48047000000</revenue>
        <assets>105128000000</assets>
        <equity>56943000000</equity>
        <netincome>6272000000</netincome>
        <stockprice>55.43</stockprice>
        <employees>76000</employees>
    </bigcompany>
    <bigcompany>
        <company>Company PC</company>
        <industry>Pharmaceuticals</industry>
        <revenue>74331000000</revenue>
        <assets>131119000000</assets>
        <equity>69752000000</equity>
        <netincome>16323000000</netincome>
        <stockprice>102.31</stockprice>
        <employees>126500</employees>
    </bigcompany>
    <bigcompany>
        <company>Company PD</company>
        <industry>Pharmaceuticals</industry>
        <revenue>23113000000</revenue>
        <assets>35249000000</assets>
        <equity>17641000000</equity>
        <netincome>4685000000</netincome>
        <stockprice>67.2</stockprice>
        <employees>37925</employees>
    </bigcompany>
    <bigcompany>
        <company>Company PE</company>
        <industry>Pharmaceuticals</industry>
        <revenue>15879000000</revenue>
        <assets>33749000000</assets>
        <equity>14852000000</equity>
        <netincome>2004000000</netincome>
        <stockprice>58</stockprice>
        <employees>28000</employees>
    </bigcompany>
    <bigcompany>
        <company>Company MA</company>
        <industry>Media</industry>
        <revenue>48813000000</revenue>
        <assets>84186000000</assets>
        <equity>44958000000</equity>
        <netincome>8004000000</netincome>
        <stockprice>93.65</stockprice>
        <employees>180000</employees>
    </bigcompany>
    <bigcompany>
        <company>Company MB</company>
        <industry>Media</industry>
        <revenue>64657000000</revenue>
        <assets>158813000000</assets>
        <equity>51058000000</equity>
        <netincome>7135000000</netincome>
        <stockprice>57.05</stockprice>
        <employees>139000</employees>
    </bigcompany>
    <bigcompany>
        <company>Company MC</company>
        <industry>Media</industry>
        <revenue>31867000000</revenue>
        <assets>54793000000</assets>
        <equity>17418000000</equity>
        <netincome>4514000000</netincome>
        <stockprice>36.52</stockprice>
        <employees>27000</employees>
    </bigcompany>
    <bigcompany>
        <company>TCompany MD</company>
        <industry>Media</industry>
        <revenue>29795000000</revenue>
        <assets>67994000000</assets>
        <equity>29904000000</equity>
        <netincome>3691000000</netincome>
        <stockprice>84.3</stockprice>
        <employees>26000</employees>
    </bigcompany>
    <bigcompany>
        <company>Company ME</company>
        <industry>Media</industry>
        <revenue>15284000000</revenue>
        <assets>26387000000</assets>
        <equity>9966000000</equity>
        <netincome>1879000000</netincome>
        <stockprice>54.88</stockprice>
        <employees>20915</employees>
    </bigcompany>
</data>

XSLT 1

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 <xsl:output omit-xml-declaration="yes" indent="yes"/>
 <xsl:strip-space elements="*"/>    

  <xsl:template match="node()|@*">
    <xsl:copy>
      <xsl:apply-templates select="node()|@*"/>
    </xsl:copy>
  </xsl:template>

  <xsl:template match="data">
    <xsl:copy>
      <xsl:apply-templates>        
        <xsl:sort select="industry" order="ascending"/>
        <xsl:sort select="netincome" data-type="number" order="descending"/> 
      </xsl:apply-templates>
    </xsl:copy>
  </xsl:template>

</xsl:stylesheet>

XSLT 2

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 <xsl:output omit-xml-declaration="yes" indent="yes"/>
 <xsl:strip-space elements="*"/> 

  <xsl:key name="indkey" match="bigcompany/industry" use="."/>

  <xsl:template match="data">
    <data>
    <xsl:for-each select="bigcompany/industry[generate-id()    
                         = generate-id(key('indkey', .)[1])]">  
      <xsl:sort select="." order="ascending"/>                    

        <aggdata>
          <xsl:copy-of select="."/>        
          <SumOfRevenue><xsl:copy-of select="sum(key('indkey', .)/../revenue)"/></SumOfRevenue>
          <AvgOfAssets><xsl:copy-of select="sum(key('indkey', .)/../assets) div count(key('indkey', .)/../assets)"/></AvgOfAssets>
          <AvgOfEquity><xsl:copy-of select="sum(key('indkey', .)/../equity) div count(key('indkey', .)/../equity)"/></AvgOfEquity>
          <MaxOfIncome><xsl:value-of select="key('indkey', .)[1]/../netincome"/></MaxOfIncome>
          <MinOfIncome><xsl:value-of select="key('indkey', .)[5]/../netincome"/></MinOfIncome>
          <AvgOfStockPrice><xsl:copy-of select="sum(key('indkey', .)/../stockprice) div count(key('indkey', .)/../stockprice)"/></AvgOfStockPrice>
          <SumOfEmployees><xsl:copy-of select="sum(key('indkey', .)/../employees)"/></SumOfEmployees>
        </aggdata>

    </xsl:for-each>

    </data>
  </xsl:template>
</xsl:stylesheet>

Final and Desired results - but how with one XSLT?

<?xml version='1.0' encoding='UTF-8'?>
<data>
  <aggdata>
    <industry>Media</industry>
    <SumOfRevenue>1.90416e+011</SumOfRevenue>
    <AvgOfAssets>7.84346e+010</AvgOfAssets>
    <AvgOfEquity>3.06608e+010</AvgOfEquity>
    <MaxOfIncome>8004000000</MaxOfIncome>
    <MinOfIncome>1879000000</MinOfIncome>
    <AvgOfStockPrice>65.28</AvgOfStockPrice>
    <SumOfEmployees>392915</SumOfEmployees>
  </aggdata>
  <aggdata>
    <industry>Oil &amp; Gas</industry>
    <SumOfRevenue>7.6821e+011</SumOfRevenue>
    <AvgOfAssets>1.535778e+011</AvgOfAssets>
    <AvgOfEquity>8.12524e+010</AvgOfEquity>
    <MaxOfIncome>32520000000</MaxOfIncome>
    <MinOfIncome>2700000000</MinOfIncome>
    <AvgOfStockPrice>68.138</AvgOfStockPrice>
    <SumOfEmployees>224240</SumOfEmployees>
  </aggdata>
  <aggdata>
    <industry>Pharmaceuticals</industry>
    <SumOfRevenue>2.10975e+011</SumOfRevenue>
    <AvgOfAssets>9.49038e+010</AvgOfAssets>
    <AvgOfEquity>4.6162e+010</AvgOfEquity>
    <MaxOfIncome>16323000000</MaxOfIncome>
    <MinOfIncome>2004000000</MinOfIncome>
    <AvgOfStockPrice>62.616</AvgOfStockPrice>
    <SumOfEmployees>346425</SumOfEmployees>
  </aggdata>
</data>

Solution

  • How about this way?

    XSLT 1.0

    <xsl:stylesheet version="1.0" 
    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:exsl="http://exslt.org/common"
    extension-element-prefixes="exsl">
    <xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>
    <xsl:strip-space elements="*"/>
    
    <xsl:key name="co-by-ind" match="bigcompany" use="industry" />
    
    <xsl:template match="/data">
        <xsl:copy>
            <xsl:for-each select="bigcompany[generate-id() = generate-id(key('co-by-ind', industry)[1])]">  
                <xsl:sort select="industry" data-type="text" order="ascending"/>                   
                <!-- variables -->
                <xsl:variable name="curr-group" select="key('co-by-ind', industry)" />
                <xsl:variable name="income-sorted">
                    <xsl:for-each select="$curr-group">
                        <xsl:sort select="netincome" data-type="number" order="ascending"/>
                        <xsl:copy-of select="netincome"/>
                    </xsl:for-each>
                </xsl:variable>
                <xsl:variable name="income-sorted-set" select="exsl:node-set($income-sorted)/netincome" />
                <!-- output -->
                <aggdata>
                    <xsl:copy-of select="industry"/>        
                    <SumOfRevenue>
                        <xsl:value-of select="sum($curr-group/revenue)"/>
                    </SumOfRevenue>
                    <AvgOfAssets>
                        <xsl:value-of select="sum($curr-group/assets) div count($curr-group/assets)"/>
                    </AvgOfAssets>
                    <AvgOfEquity>
                        <xsl:value-of select="sum($curr-group/equity) div count($curr-group/equity)"/>
                    </AvgOfEquity>
                    <MaxOfIncome>
                        <xsl:value-of select="$income-sorted-set[last()]"/>
                    </MaxOfIncome>
                    <MinOfIncome>
                        <xsl:value-of select="$income-sorted-set[1]"/>
                    </MinOfIncome>
                    <AvgOfStockPrice>
                        <xsl:value-of select="sum($curr-group/stockprice) div count($curr-group/stockprice)"/>
                    </AvgOfStockPrice>
                    <SumOfEmployees>
                        <xsl:value-of select="sum($curr-group/employees)"/>
                    </SumOfEmployees>
                </aggdata>
            </xsl:for-each>
        </xsl:copy>
    </xsl:template>
    
    </xsl:stylesheet>