Search code examples
xmlxslt

XSLT Aggregating data and outputting it in CSV


I'm parsing an XML file to a new XML file with only the data I need in a structure closer to the end result I need. From this second XML, I run a second XSLT to get the CSV file I need. My first XSLT gives me a proper XML, no problem there, but I can reformat it if needed. I just need help for the second XSLT, I'm getting lost using the 'key'. I'm using the Xalan processor.

Here is an excerpt of the second XML I need to parse (there are usually more 'Part's):

<?xml version="1.0" encoding="UTF-8"?>
<Project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="xfatcom2.xsd" generated="2023-12-20T10:00:15.173" version="2.0">
    <Project_number>21534</Project_number>
    <Batch>
        <Batch_name>FE20A00</Batch_name>
        <Part>
            <Part_name>CEA70064DRHBK</Part_name>
            <Part_bar_qty>2</Part_bar_qty>
            <Part_perc_scrap>0,26484</Part_perc_scrap>
        </Part>
        <Part>
            <Part_name>CEB24037</Part_name>
            <Part_bar_qty>1</Part_bar_qty>
            <Part_perc_scrap>0,94724</Part_perc_scrap>
        </Part>
    </Batch>
    <Batch>
        <Batch_name>FE20A00</Batch_name>
        <Part>
            <Part_name>CEA70064DRHBK</Part_name>
            <Part_bar_qty>2</Part_bar_qty>
            <Part_perc_scrap>0,215</Part_perc_scrap>
        </Part>
        <Part>
            <Part_name>CEB24037</Part_name>
            <Part_bar_qty>3</Part_bar_qty>
            <Part_perc_scrap>0,35514</Part_perc_scrap>
        </Part>
    </Batch>
    <Batch>
        <Batch_name>FE30A00</Batch_name>
        <Part>
            <Part_name>CEA70064DRHBK</Part_name>
            <Part_bar_qty>2</Part_bar_qty>
            <Part_perc_scrap>0,11211</Part_perc_scrap>
        </Part>
        <Part>
            <Part_name>CEB24037</Part_name>
            <Part_bar_qty>12</Part_bar_qty>
            <Part_perc_scrap>0,17375</Part_perc_scrap>
        </Part>
    </Batch>
</Project>

Here is what I currently have in my XSL:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:exsl="http://exslt.org/common">
    <xsl:output method="text" encoding="UTF-8" omit-xml-declaration="yes" indent="no" />
    <xsl:include href="PontEpicorCommons.xsl"/>
    <xsl:key name="PartKey" match="Part" use="Part_name"/>
    <xsl:template match="Project">
        <!-- Header -->
        <xsl:text>Project.ProjectID;ProjPhase.PhaseID;UD_PartNum_C;UD_WitnessQty_c;UD_OptimisedPerc_c</xsl:text>
        <xsl:variable name="Project_number" select="Project_number"/><!-- Project number -->
        <xsl:for-each select="Batch">
            <xsl:variable name="Batch_name" select="Batch_name"/>
             <xsl:for-each select="Part">
             
                <!-- testing building the output -->
                <xsl:variable name="XYZ" select="."/>
                <xsl:variable name="Part_Key" select="key('PartKey', $XYZ)"/>
                <xsl:variable name="" select=""/>
                <xsl:variable name="" select=""/>
                <xsl:variable name="" select=""/>
                <xsl:variable name="" select=""/>
                
                <xsl:text>&#13;&#10;</xsl:text>
                <xsl:variable name="Part_name" select="Part_name"/><!-- part name must not be duplicate, use 'key' -->
                <xsl:variable name="Part_bar_qty" select="Part_bar_qty"/>
                <xsl:variable name="Part_perc_scrap" select="Part_perc_scrap"/>
                <!-- A - Project.ProjectID -->
                <xsl:value-of select="$Project_number"/>
                <xsl:text>;</xsl:text>
                <!-- B - ProjPhase.PhaseID -->
                <xsl:value-of select="$Batch_name"/>
                <xsl:text>;</xsl:text>
                <!-- C - UD_PartNum_c -->
                <xsl:value-of select="$Part_name"/>
                <xsl:text>;</xsl:text>
                <!-- D - UD_WitnessQty_c -->
                <xsl:value-of select="$Part_bar_qty"/><!-- need total of all same shipping batch -->
                <xsl:text>;</xsl:text>
                <!-- E - UD_OptimisedPerc_c -->
                <xsl:value-of select="$Part_perc_scrap"/><!-- need weighted average (by bar qty) of all same shipping batch -->
            </xsl:for-each>
        </xsl:for-each>
    </xsl:template>
</xsl:stylesheet>

And here is the CSV output I would like from this:

Project.ProjectID;ProjPhase.PhaseID;UD_PartNum_C;UD_WitnessQty_c;UD_OptimisedPerc_c
21534;FE20A00;CEA70064DRHBK;4;0,23992
21534;FE20A00;CEB24037;4;0,503165
21534;FE30A00;CEA70064DRHBK;2;0,11211
21534;FE30A00;CEB24037;12;0,17375

Output format explanation:

1st line is just the column names
The rest is:
1st column <Project_number>
2nd column <Batch_name>
3rd column <Part_name>
4th column <Part_bar_qty> ("total from all the Part having the same Part_name and belonging to the same batch (by Batch_name)
5th column <Part_perc_scrap> (weighted average by <Part_bar_qty> for all <Batch> having the same <Batch_name>)

Solution

  • Here is something that does almost everything you asked for, except for the last column.

    XSLT 1.0

    <xsl:stylesheet version="1.0" 
    xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:output method="text" encoding="UTF-8"/>
    
    <xsl:key name="part" match="Part" use="concat(Part_name, '|', ../Batch_name)"/>
    
    <xsl:template match="/Project">
        <!-- header -->
        <xsl:text>Project.ProjectID;ProjPhase.PhaseID;UD_PartNum_C;UD_WitnessQty_c;UD_OptimisedPerc_c&#10;</xsl:text>
        <!-- data -->
        <xsl:variable name="Project_number" select="Project_number"/>
        <xsl:for-each select="Batch/Part[count(. | key('part', concat(Part_name, '|', ../Batch_name))[1]) = 1]">
            <!-- 1. Project -->
            <xsl:value-of select="$Project_number"/>
            <xsl:text>;</xsl:text>
            <!-- 2. Batch -->
            <xsl:value-of select="../Batch_name"/>
            <xsl:text>;</xsl:text>
            <!-- 3. Part -->
            <xsl:value-of select="Part_name"/>
            <xsl:text>;</xsl:text>
            <!-- 4. group -->
            <xsl:variable name="group" select="key('part', concat(Part_name, '|', ../Batch_name))"/>
            <!-- 4.a group quantity -->
            <xsl:value-of select="sum($group/Part_bar_qty)"/>
            <!-- 4.b average -->
            <!-- ??? -->
    
            <xsl:text>&#10;</xsl:text>
        </xsl:for-each>
    </xsl:template>
    
    </xsl:stylesheet>
    

    Regarding the last column:

    1. You say you want a weighted average by Part_bar_qty - but you don't say an average of what.
    2. Computing a weighted average in XSLT 1.0 is not a trivial task; I suggest you ask about this in a separate question.
    3. An additional complication in your case is that the provided Part_perc_scrap values use a comma as the decimal separator; you will need to translate this to a decimal point before you can use the value as a number in a numerical operation. And you will need to format the result, if you want it to use a decimal comma too.