Search code examples
javams-wordapache-poitext-direction

RTL direction with punctuation not working well on - POI word packages


I have struggled with that much time and didn't find solution yet.

I have some text that I'm injecting into a word file by POI xwpf package.

It's in Hebrew - an RTL language, and the punctuation like dot, or comma, should be in the left side of the row, and not on the right.

I did some solutions that guiding how to style the text like that but it didn't set the order well, only align it to the right. See image below.

(I have used STOnOff1 according to this answer)

My POC code:

package yimprogramming.lotem.wordtoword.test;

import java.io.FileOutputStream;

import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.openxmlformats.schemas.officeDocument.x2006.sharedTypes.STOnOff1;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPPr;

public class RtlPoiProblem {

    
    public static void main(String[] args) {
        System.out.println("Show rtl problem: ");
        demoRtl1();
        
        
    }
    public static void demoRtl1() {
        
        XWPFDocument doc= new XWPFDocument();

          XWPFParagraph paragraph = doc.createParagraph();
          XWPFRun run = paragraph.createRun();
          run.setText("Paragraph 1 LTR");

          paragraph = doc.createParagraph();

          CTP ctp = paragraph.getCTP();
          CTPPr ctppr;
          if ((ctppr = ctp.getPPr()) == null) ctppr = ctp.addNewPPr();
          ctppr.addNewBidi().setVal(STOnOff1.ON);

          run = paragraph.createRun();
//        run.setText("السلام عليكم");
          run.setText("נקודה אחרי.");


          paragraph = doc.createParagraph();
          run = paragraph.createRun();
          run.setText("Paragraph 3 LTR");
            
          FileOutputStream out;
        try {
            out = new FileOutputStream("WordDocument.docx");
             doc.write(out);
              out.close();
              doc.close();  
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
         
        
    }
}

Result:

problem

My pom.xml:

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>3.2.3</version>
        <relativePath /> <!-- lookup parent from repository -->
    </parent>
    <groupId>yimprogramming.word2word</groupId>
    <artifactId>word-to-word</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <name>Word Files Generator</name>
    <description>Program that creates New description word files from exist data . </description>
    <properties>
        <java.version>8</java.version>
    </properties>
    <dependencies>

        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <!--<version>1.18.30</version> -->
            <scope>provided</scope>
        </dependency>

        <!-- For word parsing -->
        <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>5.2.5</version>
        </dependency>

    </dependencies>




    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <source>11</source>
                    <target>11</target>
                    <encoding>UTF-8</encoding>


                </configuration>
            </plugin>
            <plugin>

                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-dependency-plugin</artifactId>
                <executions>
                    <execution>
                        <id>copy-dependencies</id>
                        <phase>prepare-package</phase>
                        <goals>
                            <goal>copy-dependencies</goal>
                        </goals>
                        <configuration>
                            <outputDirectory>
                                ${project.build.directory}/libs
                            </outputDirectory>
                        </configuration>
                    </execution>
                </executions>
            </plugin>

            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-jar-plugin</artifactId>
                <configuration>
                    <archive>
                        <manifest>
                            <addClasspath>true</addClasspath>
                            <classpathPrefix>libs/</classpathPrefix>
                            <!-- Here comes the main class -->
                            <mainClass>
                                yimprogramming.lotem.wordtoword.WordFilesGeneratorApplication
                            </mainClass>
                        </manifest>
                    </archive>
                </configuration>
            </plugin>
        </plugins>
    </build>

</project>

Solution

  • See bidirectional with word document using Aphace POI.

    If HebrewTextFile.txt contains:

    שלום עולם!
    זה עובד טוב מאוד עכשיו.
    
    קורס פיתוח מנהלים דרג מתקדם -הכשרה מקצועית למנהלים בדרג ביניים במשרד האוצר (כ-20 משתתפים). פיתוח, תכנון והנחיה, תפעול וארגון פעילויות הדרכה, ליווי והנחיית המפגשים עצמם. בנוסף התקיימה עבודה מול ספקים.
    

    ...then following code...

    import java.io.File;
    import java.io.FileOutputStream;
    import java.nio.charset.StandardCharsets;
    import java.nio.file.Files;
    
    import org.apache.poi.xwpf.usermodel.*;
    
    import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
    import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPPr;
    //import org.openxmlformats.schemas.wordprocessingml.x2006.main.STOnOff;
    
    import java.util.List;
    
    public class CreateWordRTLParagraphsFromFile {
    
     public static void main(String[] args) throws Exception {
    
      List<String> lines = Files.readAllLines(new File("HebrewTextFile.txt").toPath(), StandardCharsets.UTF_16);
    
      XWPFDocument doc= new XWPFDocument();
    
      for (String line : lines) {
    
       XWPFParagraph paragraph = doc.createParagraph();
       CTP ctp = paragraph.getCTP();
       CTPPr ctppr = ctp.getPPr();
       if (ctppr == null) ctppr = ctp.addNewPPr();
       //ctppr.addNewBidi().setVal(STOnOff.ON);
       ctppr.addNewBidi().setVal(true);
    
       XWPFRun run = paragraph.createRun();
       run.setText("\u202E" + line + "\u202C");
    
      }
    
      FileOutputStream out = new FileOutputStream("./WordDocument.docx");
      doc.write(out);
      out.close();
      doc.close();
    
     }
    }
    

    ...results in:

    enter image description here