Search code examples
javaxslt-1.0apache-fop

Apache FOP XSLT: Highlight keywords


I want to highlight certain words in a pdf generated with apache-fop. Because both the words to highlight as well as the input-text are dynamic, I thought calling a java function that returns xsl inline element is the easiest way:

<xsl:variable name="lines" select="ext:highlight(.)"/>
<xsl:for-each select="$lines">
  <fo:block><xsl:value-of disable-output-escaping="yes" select="."/></fo:block>
</xsl:for-each>
public String highlight(String input) {
  for (String toHighlight : wordsToHighlight) {
    input = input.replaceAll(toHighlight, "<fo:inline background-color=\"yellow\">toHightlight</fo:inline>");
  }
  return input;
}

Unfortunately, the returned inline element is put literally in the pdf. What am I missing?

Also, I've looked into XSLT XML: highlight a search word in search results, but it becomes too complicated in my case.

Minimal reproducible example:

@Data
public class Example {

    private Set<String> keywords = Set.of("fox", "brown");
    private String inputText = "The quick brown fox jumped over the fence.";

}
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0"
                xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
                xmlns:exsl="http://exslt.org/common"
                xmlns:fo="http://www.w3.org/1999/XSL/Format">

    <xsl:template match="/">
        <fo:root>
            <fo:layout-master-set>
                <fo:simple-page-master master-name="simpleA4">
                    <fo:region-body />
                </fo:simple-page-master>
            </fo:layout-master-set>

            <fo:page-sequence master-reference="simpleA4">
                <fo:flow flow-name="xsl-region-body">
                    <xsl:apply-templates select="example" />
                </fo:flow>
            </fo:page-sequence>
        </fo:root>
    </xsl:template>

    <xsl:template match="example">
        <xsl:apply-templates select="inputText" />
    </xsl:template>

    <xsl:variable name="dictionary">
        <entry keyword="fox" />
        <entry keyword="brown" />
    </xsl:variable>

    <xsl:template match="inputText">
        <fo:block>
            <xsl:call-template name="multi-hilite">
                <xsl:with-param name="string" select="." />
                <xsl:with-param name="entries" select="exsl:node-set($dictionary)/entry" />
            </xsl:call-template>
        </fo:block>
    </xsl:template>

    <xsl:template name="multi-hilite">
        <xsl:param name="string"/>
        <xsl:param name="entries"/>
        <xsl:choose>
            <xsl:when test="$entries">
                <xsl:call-template name="multi-hilite">
                    <xsl:with-param name="string">
                        <xsl:call-template name="hilite">
                            <xsl:with-param name="text" select="$string" />
                            <xsl:with-param name="search-string" select="$entries[1]/@keyword" />
                        </xsl:call-template>
                    </xsl:with-param>
                    <xsl:with-param name="entries" select="$entries[position() > 1]"/>
                </xsl:call-template>
            </xsl:when>
            <xsl:otherwise>
                <xsl:value-of select="$string"/>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>

    <xsl:template name="hilite">
        <xsl:param name="text"/>
        <xsl:param name="search-string"/>
        <xsl:choose>
            <xsl:when test="contains($text, $search-string)">
                <xsl:value-of select="substring-before($text, $search-string)"/>
                <fo:inline background-color="#eeee00">
                    <xsl:value-of select="$search-string"/>
                </fo:inline>
                <xsl:call-template name="hilite">
                    <xsl:with-param name="text" select="substring-after($text, $search-string)"/>
                    <xsl:with-param name="search-string" select="$search-string"/>
                </xsl:call-template>
            </xsl:when>
            <xsl:otherwise>
                <xsl:value-of select="$text"/>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>

</xsl:stylesheet>

Solution

  • Consider the following simplified example:

    XML

    <example>
        <keywords>
            <keyword>brown</keyword>
            <keyword>fox</keyword>
        </keywords>
        <inputText>The quick brown fox jumped over the fence.</inputText>
    </example>
    

    XSLT 1.0

    <xsl:stylesheet version="1.0" 
    xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>
    <xsl:strip-space elements="*"/>
    
    <xsl:template match="/example">
        <output>
            <xsl:call-template name="hilite-keywords">
                <xsl:with-param name="string" select="inputText"/>
                <xsl:with-param name="keywords" select="keywords/keyword"/>
            </xsl:call-template>
        </output>
    </xsl:template>
    
    <xsl:template name="hilite-keywords">
        <xsl:param name="string"/>
        <xsl:param name="keywords"/>
        <xsl:choose>
            <xsl:when test="$keywords">
                <xsl:variable name="keyword" select="$keywords[1]" />
                <xsl:choose>
                    <xsl:when test="contains($string, $keyword)">
                        <!-- process substring-before with the remaining keywords -->
                        <xsl:call-template name="hilite-keywords">
                            <xsl:with-param name="string" select="substring-before($string, $keyword)"/>
                            <xsl:with-param name="keywords" select="$keywords[position() > 1]"/>
                        </xsl:call-template>
                        <!-- matched keyword -->
                        <hilite>
                            <xsl:value-of select="$keyword"/>
                        </hilite>
                        <!-- continue with substring-after -->
                        <xsl:call-template name="hilite-keywords">
                            <xsl:with-param name="string" select="substring-after($string, $keyword)"/>
                            <xsl:with-param name="keywords" select="$keywords"/>
                        </xsl:call-template>
                    </xsl:when>
                    <xsl:otherwise>
                        <!-- pass the entire string for processing with the remaining keywords -->
                        <xsl:call-template name="hilite-keywords">
                            <xsl:with-param name="string" select="$string"/>
                            <xsl:with-param name="keywords" select="$keywords[position() > 1]"/>
                        </xsl:call-template>
                    </xsl:otherwise>
                </xsl:choose>
            </xsl:when>
            <xsl:otherwise>
                <xsl:value-of select="$string"/>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>
    
    </xsl:stylesheet>
    

    Result

    <?xml version="1.0" encoding="UTF-8"?>
    <output>The quick <hilite>brown</hilite> <hilite>fox</hilite> jumped over the fence.</output>