Search code examples
xslttransposemuenchian-grouping

Transpose XML using Muenchian method - multiple groupings


I need to transpose an XML file such as this:

<?xml version="1.0" encoding="UTF-8"?>
<products>
    <product id="10" name="Widget 1" price="15.99" category_code="T" category="Toys" manufacturer_code="SC1" manufacturer="Some Company 1" />
    <product id="10" name="Widget 1" price="15.99" category_code="E" category="Electronics" manufacturer_code="SC1" manufacturer="Some Company 1" />
    <product id="10" name="Widget 1" price="15.99" category_code="T" category="Toys" manufacturer_code="SC2" manufacturer="Some Company 2" />
    <product id="10" name="Widget 1" price="15.99" category_code="E" category="Electronics" manufacturer_code="SC2" manufacturer="Some Company 2" />
    <product id="10" name="Widget 1" price="15.99" category_code="T" category="Toys" manufacturer_code="SC3" manufacturer="Some Company 3" />
    <product id="10" name="Widget 1" price="15.99" category_code="E" category="Electronics" manufacturer_code="SC3" manufacturer="Some Company 3" />
    <product id="11" name="Widget 2" price="21.99" category_code="V" category="Video Games" manufacturer_code="SC4" manufacturer="Some Company 4" />
    <product id="12" name="Widget 3" price="10.99" category_code="T" category="Toys" manufacturer_code="SC1" manufacturer="Some Company 2" />
</products>

to a comma delimited text file, or a properly formated HTML table that includes only one line for each product, something like this:

id,name,price,category_code_1,category_1,category_code_2,category_2,manufacturer_code_1,manufacturer_1,manufacturer_code_2,manufacturer_2,manufacturer_code_3,manufacturer_3
    10, Widget 1, 15.99, T, Toys, E, Electronics, SC1, Some Company 1, SC2, Some Company 2, SC3, Some Company 3 
    11, Widget 2, 21.99, V, Video Games,,, SC4, Some Company 4,,,,
    12, Widget 3, 10.99, T, Toys,,, SC1, Some Company 2,,,,

As you'd notice, the XML data can be thought of as the result of three tables joined together: product, product_category, and product_manufacturer. Each product can belong to multiple categories and have multiple manufacturers. Of course the real data that I'm dealing with is more complex and in a totally different domain, but this sample depicts the problem properly.

I have very limited knowledge of XSLT, and with help of SO and other resources on the Interent, have put together a stylesheet that partly provides me what I need:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:output method="html" version="1.0" encoding="UTF-8" indent="yes"/>
    <xsl:key name="key_product_group" match="product" use="@id"/>
    <xsl:key name="key_category_group" match="product" use="concat(
             @id,
             @category_code,
             @category)"/>
    <xsl:key name="key_manufacturer_group" match="product" use="concat(
             @id,
             @manufacturer_code,
             @manufacturer)"/>
    <xsl:variable name="var_max_category_group"  >
        <xsl:for-each select="//product[generate-id(.) = generate-id(key('key_product_group',@id) )]">
            <xsl:sort select="count(key('key_product_group',@id )[generate-id() = generate-id(key('key_category_group',
                                   concat(@id,
                                  @category_code,
                                  @category)))])" order="descending"/>
            <xsl:if test="position() = 1">
                <xsl:value-of select="count(key('key_product_group',@id )[generate-id() = generate-id(key('key_category_group',
                                   concat(@id,
                                  @category_code,
                                  @category)))])"/>
            </xsl:if>
        </xsl:for-each>
    </xsl:variable>
    <xsl:variable name="var_max_manufacturer_group">
        <xsl:for-each select="//product[generate-id(.) = generate-id(key('key_product_group',@id) )]">
            <xsl:sort select="count(key('key_product_group',@id )[generate-id() = generate-id(key('key_manufacturer_group',
                                   concat(@id,
                                  @category_code,
                                  @category)))])" order="descending"/>
            <xsl:if test="position() = 1">
                <xsl:value-of   select="count(key('key_product_group',@id )[generate-id() = generate-id(key('key_manufacturer_group',
                                   concat(@id,
                                  @manufacturer_code,
                                  @manufacturer)))])"/>
            </xsl:if>
        </xsl:for-each>
    </xsl:variable>
    <xsl:template match="/">
        <xsl:text>id,</xsl:text>
        <xsl:text>name,</xsl:text>
        <xsl:text>price,</xsl:text>
        <xsl:call-template name="loop_pcat">
            <xsl:with-param name="count" select="$var_max_category_group"/>
        </xsl:call-template>
        <xsl:call-template name="loop_pmf">
            <xsl:with-param name="count" select="$var_max_manufacturer_group"/>
        </xsl:call-template>
        <br></br>
        <xsl:variable name="var_group"
                          select="//product[generate-id(.) = generate-id(key('key_product_group',@id)[1])]"/>
        <xsl:for-each select="$var_group">
            <xsl:sort order="ascending" select="@id"/>
            <xsl:value-of select="@id"/>,
            <xsl:value-of select="@name"/>,
            <xsl:value-of select="@price"/>,
            <xsl:for-each select="key('key_product_group',@id )[generate-id() = generate-id(key('key_category_group',
                                   concat(@id,
                                  @category_code,
                                  @category)))]">
                <xsl:value-of select="@category_code"/>,
                <xsl:value-of select="@category"/>,
            </xsl:for-each>
            <xsl:for-each select="key('key_product_group',@id )[generate-id() = generate-id(key('key_manufacturer_group',
                                   concat(@id,
                                  @manufacturer_code,
                                  @manufacturer)))]">
                <xsl:value-of select="@manufacturer_code"/>,
                <xsl:value-of select="@manufacturer"/>,
            </xsl:for-each>
            <br></br>
        </xsl:for-each>
    </xsl:template>
    <xsl:template name="loop_pcat">
        <xsl:param name="count" select="1"/>
        <xsl:param name="limit" select="$count+1"/>
        <xsl:if test="$count > 0">
            <xsl:value-of select="concat('category_code_',$limit - $count)"/>
            <xsl:text>,</xsl:text>
            <xsl:value-of select="concat('category_',$limit - $count)"/>
            <xsl:text>,</xsl:text>
            <xsl:call-template name="loop_pcat">
                <xsl:with-param name="count" select="$count - 1"/>
                <xsl:with-param name="limit" select="$limit"/>
            </xsl:call-template>
        </xsl:if>
    </xsl:template>
    <xsl:template name="loop_pmf">
        <xsl:param name="count" select="1"/>
        <xsl:param name="limit" select="$count+1"/>
        <xsl:if test="$count > 0">
            <xsl:value-of select="concat('manufacturer_code_',$limit - $count)"/>
            <xsl:text>,</xsl:text>
            <xsl:value-of select="concat('manufacturer_',$limit - $count)"/>
            <xsl:text>,</xsl:text>
            <xsl:call-template name="loop_pmf">
                <xsl:with-param name="count" select="$count - 1"/>
                <xsl:with-param name="limit" select="$limit"/>
            </xsl:call-template>
        </xsl:if>
    </xsl:template>
</xsl:stylesheet>

The above stylesheet produces the following results:

id,name,price,category_code_1,category_1,category_code_2,category_2,manufacturer_code_1,manufacturer_1,manufacturer_code_2,manufacturer_2,manufacturer_code_3,manufacturer_3,
10, Widget 1, 15.99, T, Toys, E, Electronics, SC1, Some Company 1, SC2, Some Company 2, SC3, Some Company 3, 
11, Widget 2, 21.99, V, Video Games, SC4, Some Company 4, 
12, Widget 3, 10.99, T, Toys, SC1, Some Company 2, 

The output has at least one major problem: all columns do not exist in each line, for example line 2 and 3 are missing category_code_2, and category_2, and manufacture_code and manufacturer 2 and 3. I'm sure there are other problems with the stylesheet as well, and I have no idea how it would perform on a relatively large xml file, but for now I would greatly appreciate your help with making the stylesheet produce the desired output format.

Thank you

MRSA


Solution

  • I had already begun with my own implementation, but then you posted and I was able to reuse some of your code to speed things up a bit. Main differences are:

    • in the templates, delimiters are added in front instead of after data; that way, you can end your lines properly.
    • the keys only use the ids and the codes (not the names).
    • the concatenation for the keys also uses a delimiter ('#').
    • the main template only provides the header line.
    • the product template has simpler calls to the data templates.
    • modified the data templates accordingly…

    Here is my resulting stylesheet and its output:

    <?xml version="1.0" encoding="UTF-8"?>
    <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
        <xsl:output method="text" version="1.0" encoding="UTF-8"/>
        <xsl:key name="byProduct" match="product" use="@id"/> 
        <xsl:key name="byProductAndCategory" match="product" use="concat(@id,'#',@category_code)"/> 
        <xsl:key name="byProductAndManufacturer" match="product" use="concat(@id,'#',@manufacturer_code)"/>  
        <xsl:variable name="delimiter" select="','"/>
        <xsl:variable name="eol" select="'&#xa;'"/>
        <xsl:variable name="maxManufacturers">
            <xsl:for-each select="//product[count(.|key('byProduct',@id)[1])=1]">
                <xsl:sort select="count(key('byProductAndCategory',concat(@id,'#',@category_code)))"/>
                <xsl:if test="position()=last()">
                    <xsl:value-of select="count(key('byProductAndCategory',concat(@id,'#',@category_code)))"/>
                </xsl:if>
            </xsl:for-each>
        </xsl:variable>
        <xsl:variable name="maxCategories">
            <xsl:for-each select="//product[count(.|key('byProduct',@id)[1])=1]">
                <xsl:sort select="count(key('byProductAndManufacturer',concat(@id,'#',@manufacturer_code)))"/>
                <xsl:if test="position()=last()">
                    <xsl:value-of select="count(key('byProductAndManufacturer',concat(@id,'#',@manufacturer_code)))"/>
                </xsl:if>
            </xsl:for-each>
        </xsl:variable>
        <xsl:template match="/">
            <!--create the file header-->
            <xsl:text>id</xsl:text>
            <xsl:value-of select="$delimiter"/>
            <xsl:text>name</xsl:text>
            <xsl:value-of select="$delimiter"/>
            <xsl:text>price</xsl:text>
            <!--recursive loop for each group to add appropriate number of columns, i.e. maximum number of repetitions-->
            <xsl:call-template name="loop_pcat_header">
                <xsl:with-param name="count" select="$maxCategories"/>
            </xsl:call-template>
            <!--recursive loop for each group to add appropriate number of columns, i.e. maximum number of repetitions-->
            <xsl:call-template name="loop_pmf_header">
                <xsl:with-param name="count" select="$maxManufacturers"/>
            </xsl:call-template>
            <xsl:value-of select="$eol"/>
            <xsl:apply-templates select="//product[count(.|key('byProduct',@id)[1])=1]"/>
        </xsl:template>
        <xsl:template match="product">
            <!-- id -->
            <xsl:value-of select="@id"/>
            <xsl:value-of select="$delimiter"/>
            <!-- name -->
            <xsl:value-of select="@name"/>
            <xsl:value-of select="$delimiter"/>
            <!-- price -->
            <xsl:value-of select="@price"/>
            <!-- category stuff -->
            <xsl:call-template name="loop_pcat_data">
                <xsl:with-param name="data" select="key('byProductAndManufacturer',concat(@id,'#',@manufacturer_code))"/>
                <xsl:with-param name="count" select="$maxCategories"/>
            </xsl:call-template>
            <!-- manufacturer stuff -->
            <xsl:call-template name="loop_pmf_data">
                <xsl:with-param name="data" select="key('byProductAndCategory',concat(@id,'#',@category_code))"/>
                <xsl:with-param name="count" select="$maxManufacturers"/>
            </xsl:call-template>
            <xsl:value-of select="$eol"/>
        </xsl:template>
        <!--recursive loop templates for file header and data-->
        <xsl:template name="loop_pcat_header">
            <xsl:param name="count" select="1"/>
            <xsl:param name="limit" select="$count+1"/>
            <xsl:if test="$count &gt; 0">
                <xsl:value-of select="$delimiter"/>
                <xsl:value-of select="concat('category_code_',$limit - $count)"/>
                <xsl:value-of select="$delimiter"/>
                <xsl:value-of select="concat('category_',$limit - $count)"/>
                <xsl:call-template name="loop_pcat_header">
                    <xsl:with-param name="count" select="$count - 1"/>
                    <xsl:with-param name="limit" select="$limit"/>
                </xsl:call-template>
            </xsl:if>
        </xsl:template>
        <xsl:template name="loop_pmf_header">
            <xsl:param name="count" select="1"/>
            <xsl:param name="limit" select="$count+1"/>
            <xsl:if test="$count &gt; 0">
                <xsl:value-of select="$delimiter"/>
                <xsl:value-of select="concat('manufacturer_code_',$limit - $count)"/>
                <xsl:value-of select="$delimiter"/>
                <xsl:value-of select="concat('manufacturer_',$limit - $count)"/>
                <xsl:call-template name="loop_pmf_header">
                    <xsl:with-param name="count" select="$count - 1"/>
                    <xsl:with-param name="limit" select="$limit"/>
                </xsl:call-template>
            </xsl:if>
        </xsl:template>
        <xsl:template name="loop_pcat_data">
            <xsl:param name="data"/>
            <xsl:param name="count" select="1"/>
            <xsl:param name="limit" select="$count+1"/>
            <xsl:if test="$count &gt; 0">
                <xsl:value-of select="$delimiter"/>
                <xsl:value-of select="$data[$limit - $count]/@category_code"/>
                <xsl:value-of select="$delimiter"/>
                <xsl:value-of select="$data[$limit - $count]/@category"/>
                <xsl:call-template name="loop_pcat_data">
                    <xsl:with-param name="data" select="$data"/>
                    <xsl:with-param name="count" select="$count - 1"/>
                    <xsl:with-param name="limit" select="$limit"/>
                </xsl:call-template>
            </xsl:if>
        </xsl:template>
        <xsl:template name="loop_pmf_data">
            <xsl:param name="data"/>
            <xsl:param name="count" select="1"/>
            <xsl:param name="limit" select="$count+1"/>
            <xsl:if test="$count &gt; 0">
                <xsl:value-of select="$delimiter"/>
                <xsl:value-of select="$data[$limit - $count]/@manufacturer_code"/>
                <xsl:value-of select="$delimiter"/>
                <xsl:value-of select="$data[$limit - $count]/@manufacturer"/>
                <xsl:call-template name="loop_pmf_data">
                    <xsl:with-param name="data" select="$data"/>
                    <xsl:with-param name="count" select="$count - 1"/>
                    <xsl:with-param name="limit" select="$limit"/>
                </xsl:call-template>
            </xsl:if>
        </xsl:template>
    </xsl:stylesheet>
    

    The requested output:

    id,name,price,category_code_1,category_1,category_code_2,category_2,manufacturer_code_1,manufacturer_1,manufacturer_code_2,manufacturer_2,manufacturer_code_3,manufacturer_3
    10,Widget 1,15.99,T,Toys,E,Electronics,SC1,Some Company 1,SC2,Some Company 2,SC3,Some Company 3
    11,Widget 2,21.99,V,Video Games,,,SC4,Some Company 4,,,,
    12,Widget 3,10.99,T,Toys,,,SC1,Some Company 2,,,,