Search code examples
sortingxsltxslt-1.0xslt-grouping

XSLT to show only unique values for glossary index?


This glossary derives the index from the first letter of each entry. I'm trying to work out how to show only the unique values. Have looked into preceding-sibling and position() but cannot seem to find the correct way to. I'm constrained to using XSLT 1.0 and attributes.

glossary.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="glossary.xsl"?>
<include>
    <file name="data.xml"/>
</include>

data.xml

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<glossary>
    <entry term="cantaloupe" definition="A kind of melon"/>
    <entry term="banana" definition="A tropical yellow fruit"/>
    <entry term="apple" definition="A red fruit with seeds"/>
    <entry term="orange" definition="An orange citrus fruit"/>  
    <entry term="Cherry"  definition="A red fruit that grows in clusters "/>
    <entry term="cranberry" definition="A sour berry enjoyed at Thanksgiving"/>
    <entry term="avocado"  definition="A mellow fruit enjoyed in guacamole"/>
</glossary>

glossary.xsl

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:output method="html" doctype-system="about:legacy-compat" encoding="UTF-8" indent="yes" />
    <xsl:template match="/">
        <html>
            <head></head>
            <body>
            <!-- Index: how to show unique values? -->
                <xsl:for-each select="document('data.xml')/glossary/entry" >
                    <xsl:sort select="@term" data-type="text" order="ascending" case-order="upper-first"/> 
                    <xsl:variable name="initial" select="substring(@term,1,1)" />
                    <a href="#{$initial}"><xsl:value-of select="$initial" /></a> |  
                </xsl:for-each>
            <!-- Glossary -->   
                <dl>
                    <xsl:for-each select="document('data.xml')/glossary/entry" >
                        <xsl:sort select="@term" data-type="text" order="ascending" case-order="upper-first"/> 
                        <xsl:variable name="initial" select="substring(@term,1,1)" />
                        <!-- Alphabetical header: how to only the first instance of each letter? -->
                        <a name="{$initial}"><h1><xsl:value-of select="$initial" /></h1></a> 
                        <dt><xsl:apply-templates select="@term"/></dt>
                        <dd><xsl:apply-templates select="@definition"/></dd>
                    </xsl:for-each>
                </dl> 
            </body>
        </html>
    </xsl:template>
</xsl:stylesheet>   

Output so far

a | a | b | c | C | c | o |

a
apple
   A red fruit with seeds

a
avocado
   A mellow fruit enjoyed in guacamole

b
banana
   A tropical yellow fruit

c
cantaloupe
   A kind of melon

C
Cherry
   A red fruit that grows in clusters

c
cranberry
   A sour berry enjoyed at Thanksgiving

o
orange
   An orange citrus fruit



Desired output

a | b | c | o

a
apple
   A red fruit with seeds

avocado
   A mellow fruit enjoyed in guacamole

b
banana
   A tropical yellow fruit

c
cantaloupe
   A kind of melon

Cherry
   A red fruit that grows in clusters

cranberry
   A sour berry enjoyed at Thanksgiving

o
orange
   An orange citrus fruit


Solution

  • This is an example of a grouping problem and in XSLT 1.0, the established way to do grouping is to use Muenchian Grouping. Unfortunately, your scenario requires finding the lower-case of characters on top of that, and that's a bit messy in XSLT 1.0.

    Nonetheless, I've produced a solution and it goes as follows:

    <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
      <xsl:output method="html" doctype-system="about:legacy-compat" 
                  encoding="UTF-8" indent="yes" />
    
      <xsl:key name="kEntryInitial" match="entry/@term"
               use="translate(substring(., 1, 1), 
                 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 
                 'abcdefghijklmnopqrstuvwxyz')"/>
    
      <xsl:template match="/">
        <html>
          <head></head>
          <body>
            <!-- Jump into the data.xml DOM so that keys work -->
            <xsl:apply-templates select="document('data.xml')/glossary" />
          </body>
        </html>
      </xsl:template>
    
      <xsl:template match="/glossary">
        <!-- Select terms with distinct initials (case invariant) -->
        <xsl:variable name="termsByDistinctInitial"
                      select="entry/@term[generate-id() = 
                                 generate-id(key('kEntryInitial', 
                                                translate(substring(., 1, 1), 
                                                'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 
                                                'abcdefghijklmnopqrstuvwxyz'))[1])]" />
    
        <!-- Header -->
        <xsl:apply-templates select="$termsByDistinctInitial" mode="header">
          <xsl:sort select="." data-type="text" order="ascending" />
        </xsl:apply-templates>
    
        <!-- Glossary -->
        <dl>
          <xsl:apply-templates select="$termsByDistinctInitial" mode="main">
            <xsl:sort select="." data-type="text" order="ascending" />
          </xsl:apply-templates>
        </dl>
      </xsl:template>
    
      <xsl:template match="@term" mode="header">
        <xsl:variable name="initial">
          <xsl:call-template name="ToLower">
            <xsl:with-param name="value" select="substring(., 1, 1)" />
          </xsl:call-template>
        </xsl:variable>
    
        <a href="#{$initial}">
          <xsl:value-of select="$initial" />
        </a>
        <xsl:if test="position() != last()">
          <xsl:text> |</xsl:text>
        </xsl:if>
      </xsl:template>
    
      <xsl:template match="@term" mode="main">
        <xsl:variable name="initial">
          <xsl:call-template name="ToLower">
            <xsl:with-param name="value" select="substring(., 1, 1)" />
          </xsl:call-template>
        </xsl:variable>
        <a name="{$initial}">
          <h1>
            <xsl:value-of select="$initial" />
          </h1>
        </a>
    
        <xsl:apply-templates select="key('kEntryInitial', $initial)/.." />
      </xsl:template>
    
      <xsl:template match="entry">
        <dt>
          <xsl:apply-templates select="@term"/>
        </dt>
        <dd>
          <xsl:apply-templates select="@definition"/>
        </dd>
      </xsl:template>
    
      <xsl:template name="ToLower">
        <xsl:param name="value" />
        <xsl:value-of select="translate(substring($value, 1, 1), 
                          'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 
                          'abcdefghijklmnopqrstuvwxyz')"/>
      </xsl:template>
    </xsl:stylesheet>
    

    When run on your input XML, this produces the following:

    <!DOCTYPE html SYSTEM "about:legacy-compat">
    <html>
      <head>
        <META http-equiv="Content-Type" content="text/html; charset=utf-8">
      </head>
      <body><a href="#a">a</a> |<a href="#b">b</a> |<a href="#c">c</a> |<a href="#o">o</a>
        <dl><a name="a"><h1>a</h1></a><dt>apple</dt>
          <dd>A red fruit with seeds</dd>
          <dt>avocado</dt>
          <dd>A mellow fruit enjoyed in guacamole</dd><a name="b"><h1>b</h1></a><dt>banana</dt>
          <dd>A tropical yellow fruit</dd><a name="c"><h1>c</h1></a><dt>cantaloupe</dt>
          <dd>A kind of melon</dd>
          <dt>Cherry</dt>
          <dd>A red fruit that grows in clusters </dd>
          <dt>cranberry</dt>
          <dd>A sour berry enjoyed at Thanksgiving</dd><a name="o"><h1>o</h1></a><dt>orange</dt>
          <dd>An orange citrus fruit</dd>
        </dl>
      </body>
    </html>
    

    One thing I'd suggest considering is using a simple XSLT to "prep" your glossary with initials:

    <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
      <xsl:output method="xml" indent="yes"/>
    
      <xsl:template match="@* | node()">
        <xsl:copy>
          <xsl:apply-templates select="@* | node()" />
        </xsl:copy>
      </xsl:template>
    
      <xsl:template match="entry">
        <xsl:copy>
          <xsl:attribute name="initial">
            <xsl:value-of select="translate(substring(@term, 1, 1),
                                    'ABCDEFGHIJKLMNOPQRSTUVWXYZ',
                                    'abcdefghijklmnopqrstuvwxyz')"/>
          </xsl:attribute>
          <xsl:apply-templates select="@* | node()" />
        </xsl:copy>
      </xsl:template>
    </xsl:stylesheet>
    

    This produces:

    <glossary>
      <entry initial="c" term="cantaloupe" definition="A kind of melon" />
      <entry initial="b" term="banana" definition="A tropical yellow fruit" />
      <entry initial="a" term="apple" definition="A red fruit with seeds" />
      <entry initial="o" term="orange" definition="An orange citrus fruit" />
      <entry initial="c" term="Cherry" definition="A red fruit that grows in clusters " />
      <entry initial="c" term="cranberry" definition="A sour berry enjoyed at Thanksgiving" />
      <entry initial="a" term="avocado" definition="A mellow fruit enjoyed in guacamole" />
    </glossary>
    

    then if you use this prepped version as the glossary, the main XSLT can be rid of all those ugly translate() functions and becomes a lot cleaner:

    <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
      <xsl:output method="html" doctype-system="about:legacy-compat" 
                  encoding="UTF-8" indent="yes" />
    
      <xsl:key name="kEntryInitial" match="entry/@initial" use="."/>
    
      <xsl:template match="/">
        <html>
          <head></head>
          <body>
            <!-- Jump into the data.xml DOM so that keys work -->
            <xsl:apply-templates select="document('data2.xml')/glossary" />
          </body>
        </html>
      </xsl:template>
    
      <xsl:template match="/glossary">
        <!-- Select terms with distinct initials (case invariant) -->
        <xsl:variable name="termsByDistinctInitial"
                      select="entry/@initial[generate-id() = 
                                 generate-id(key('kEntryInitial', .)[1])]" />
    
        <!-- Header -->
        <xsl:apply-templates select="$termsByDistinctInitial" mode="header">
          <xsl:sort select="." data-type="text" order="ascending" />
        </xsl:apply-templates>
    
        <!-- Glossary -->
        <dl>
          <xsl:apply-templates select="$termsByDistinctInitial" mode="main">
            <xsl:sort select="." data-type="text" order="ascending" />
          </xsl:apply-templates>
        </dl>
      </xsl:template>
    
      <xsl:template match="@initial" mode="header">
        <a href="#{.}">
          <xsl:value-of select="." />
        </a>
        <xsl:if test="position() != last()">
          <xsl:text> |</xsl:text>
        </xsl:if>
      </xsl:template>
    
      <xsl:template match="@initial" mode="main">
        <a name="{.}">
          <h1>
            <xsl:value-of select="." />
          </h1>
        </a>
    
        <xsl:apply-templates select="key('kEntryInitial', .)/.." />
      </xsl:template>
    
      <xsl:template match="entry">
        <dt>
          <xsl:apply-templates select="@term"/>
        </dt>
        <dd>
          <xsl:apply-templates select="@definition"/>
        </dd>
      </xsl:template>
    </xsl:stylesheet>
    

    Of course, the final output is the same as the first example. If your XSLT processor supports the node-set() function, it's also possible to do both of these processing steps in a single XSLT.