Search code examples
javautf-8character-encodingthymeleaf

thymeleaf UTF8 xml bug?


I am doing a very simple replacement on an XML template below:

<?xml version="1.0" encoding="UTF-8"?>  
<note>  
    <to>
        ARABIC: [[${TEST_AR}]]
        HEBREW: [[${TEST_HE}]]  
        CHINESE (MANDARIN): [[${TEST_CH}]]
    </to>
</note>

But it seems like thymeleaf has a UTF8 xml bug, Or Im missing something.
Here is what I tried so far:

  • Template is coded in UTF-8
  • Java source code is saved in UTF-8
  • encoding for OutputStreamWriter is UTF-8
  • ClassLoaderTemplateResolver is set to UTF-8 Maven is set to UTF-8
  • project.build.sourceEncoding UTF-8
  • coded the XML as UTF-8

java encoding

Seems like Thymeleaf wont write UTF-8 text correctly to XML.
The code example below work faultlessly (except Chinese not sure why but its not import atm) as long as I am opening a text template (just the file extension) .

If I use this line , It works ok and output UTF-8 no Issues.

 templateEngine.process("test_template.txt", ct,out);

works great:

<?xml version="1.0" encoding="UTF-8"?>  
<note>  
    <to>
        ARABIC: كتابة مفهومة من قبل اغل
        HEBREW: ניסיון  
        CHINESE (MANDARIN): 
    </to>
</note>

Once I modify this line (and rename the template accordingly) to this:

templateEngine.process("test_template.xml", ct,out);

thymeleaf will crap out the Uincode fonts and export them as HEX representation.

<?xml version="1.0" encoding="UTF-8"?> 
<note>  
    <to>
        ARABIC: &#x643;&#x62a;&#x627;&#x628;&#x629; &#x645;&#x641;&#x647;&#x648;&#x645;&#x629; &#x645;&#x646; &#x642;&#x628;&#x644; &#x627;&#x63a;&#x644;
        HEBREW: &#x5e0;&#x5d9;&#x5e1;&#x5d9;&#x5d5;&#x5df;  
        CHINESE (MANDARIN): 
    </to>
</note>

Full isolated working example just create the template (test_template.txt) and put it under src/main/resources

package com.xerox;
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.time.LocalDateTime;

import org.thymeleaf.TemplateEngine;
import org.thymeleaf.context.Context;
import org.thymeleaf.templatemode.TemplateMode;
import org.thymeleaf.templateresolver.ClassLoaderTemplateResolver;

public class TestThymeleafUTF8 {
    public static void main(String[] args) {
        try {
               TemplateEngine templateEngine = new TemplateEngine();
                ClassLoaderTemplateResolver resolver = new ClassLoaderTemplateResolver();
                resolver.setCharacterEncoding("UTF-8");     
                resolver.setTemplateMode(TemplateMode.TEXT);
                templateEngine.setTemplateResolver(resolver);
                Context ct = new Context();             
                ct.setVariable("TEST_AR", "كتابة مفهومة من قبل اغل");
                ct.setVariable("TEST_HE", "ניסיון");
                ct.setVariable("TEST_CN", "王明是中国人。");
                ct.setVariable("currentDate", LocalDateTime.now().toString());
                BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("test_output.txt"), StandardCharsets.UTF_8));
            
                templateEngine.process("test_template.txt", ct,out);
        } catch (Exception e) {
            System.out.println(e);
        }
    }
}

Pom.xml:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>com.xerox</groupId>
  <artifactId>testUTF</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <name>testUTF</name>
  <description>thymeleaf testUTF</description>
  
    <properties>
            <maven.compiler.source>1.8</maven.compiler.source>
            <maven.compiler.target>1.8</maven.compiler.target>
            <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    </properties>   
  <dependencies>
    <dependency>
        <groupId>org.thymeleaf</groupId>
        <artifactId>thymeleaf</artifactId>
        <version>3.1.0.RELEASE</version>
    </dependency>

  </dependencies>
</project>

Solution

  • It is not a bug, but the programmed behavior.

    The setTemplateMode documentation indicates:

    Sets the template mode to be applied to templates resolved by this resolver.

    If template mode patterns (see setHtmlTemplateModePatterns(Set), setXmlTemplateModePatterns(Set), etc.) are also set, they have higher priority than the template mode set here (this would act as a default).

    Note that this template mode also may not be applied if the template resource name ends in a known file name suffix: .html, .htm, .xhtml, .xml, .js, .json, .css, .rss, .atom, .txt. If this behaviour needs to be overridden so that template name is always applied, the setForceTemplateMode(boolean) will need to be set.

    Note the note that indicates that for a well known file name suffix the template mode will be overwritten. That explains the behavior you are facing: when you use test_template.xml Thymeleaf will no longer use TemplateMode.TEXT as configured but TemplateMode.XML instead, and the text will be then escaped.

    As advised in the aforementioned javadoc, you can force Thymeleaf to obbey the configured TemplateMode and your desired behavior using setForceTemplateMode:

    package com.xerox;
    
    import java.io.BufferedWriter;
    import java.io.FileOutputStream;
    import java.io.OutputStreamWriter;
    import java.nio.charset.StandardCharsets;
    import java.time.LocalDateTime;
    
    import org.thymeleaf.TemplateEngine;
    import org.thymeleaf.context.Context;
    import org.thymeleaf.templatemode.TemplateMode;
    import org.thymeleaf.templateresolver.ClassLoaderTemplateResolver;
    
    public class TestThymeleafUTF8 {
      public static void main(String[] args) {
        try {
          TemplateEngine templateEngine = new TemplateEngine();
          ClassLoaderTemplateResolver resolver = new ClassLoaderTemplateResolver();
          resolver.setCharacterEncoding("UTF-8");
          resolver.setTemplateMode(TemplateMode.TEXT); // TemplateMode.HTML should work as well
          resolver.setForceTemplateMode(true);
          templateEngine.setTemplateResolver(resolver);
          Context ct = new Context();
          ct.setVariable("TEST_AR", "كتابة مفهومة من قبل اغل");
          ct.setVariable("TEST_HE", "ניסיון");
          ct.setVariable("TEST_CH", "王明是中国人。");
          ct.setVariable("currentDate", LocalDateTime.now().toString());
          BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("test_output.xml"), StandardCharsets.UTF_8));
    
          templateEngine.process("test_template.xml", ct,out);
        } catch (Exception e) {
          System.out.println(e);
        }
      }
    }