Search code examples
javadocx4jhtml-to-text

Instead of rendering tables and other html tags in docx these are saved as plain text using docx4j-ImportXHTML


I want to render html code to docx. Instead of rendering html(i.e. tables in tabular format) it simply writes html code in it as plain text. I am using docx4j-ImportXHTML jar. I used the code from here and modified it to save in a file.

What am I doing wrong?

public static void xhtmlToDocx(String xhtml, String destinationPath, String fileName)
    {
        File dir = new File (destinationPath);
        File actualFile = new File (dir, fileName);

        WordprocessingMLPackage wordMLPackage = null;
        try
        {
            wordMLPackage = WordprocessingMLPackage.createPackage();
        }
        catch (InvalidFormatException e)
        {

            e.printStackTrace();
        }

        XHTMLImporterImpl XHTMLImporter = new XHTMLImporterImpl(wordMLPackage);
        //XHTMLImporter.setDivHandler(new DivToSdt());
        //OutputStream os = null;
        OutputStream fos = null;
        try
        {
            fos = new FileOutputStream(actualFile);
            wordMLPackage.getMainDocumentPart().getContent().addAll( 
                    XHTMLImporter.convert( xhtml, null) );

            System.out.println(XmlUtils.marshaltoString(wordMLPackage
                    .getMainDocumentPart().getJaxbElement(), true, true));
            // Back to XHTML

            HTMLSettings htmlSettings = Docx4J.createHTMLSettings();
            htmlSettings.setWmlPackage(wordMLPackage);


            // output to an OutputStream.
            //os = new ByteArrayOutputStream();

            // If you want XHTML output
            Docx4jProperties.setProperty("docx4j.Convert.Out.HTML.OutputMethodXML",
                    true);
            Docx4J.toHTML(htmlSettings, fos, Docx4J.FLAG_EXPORT_PREFER_XSL);
        }
        catch (Docx4JException | FileNotFoundException e)
        {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        finally{
            try {
                fos.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

Solution

  • I corrected my code as below:

    1. Use ByteArrayStream instead of FileOutputStream i.e.

    Instead of

    fos = new FileOutputStream(actualFile);
                wordMLPackage.getMainDocumentPart().getContent().addAll( 
                        XHTMLImporter.convert( xhtml, null) );
    

    Use:

    fos = new ByteArrayOutputStream();
    
    1. Add wordMLPackage.save(actualFile)

    Full code:

    public static void xhtmlToDocx1(String xhtml, String destinationPath, String fileName)
        {
            File dir = new File (destinationPath);
            File actualFile = new File (dir, fileName);
    
            WordprocessingMLPackage wordMLPackage = null;
            try
            {
                wordMLPackage = WordprocessingMLPackage.createPackage();
            }
            catch (InvalidFormatException e)
            {
                e.printStackTrace();
            }
    
    
            XHTMLImporterImpl XHTMLImporter = new XHTMLImporterImpl(wordMLPackage);
    
            OutputStream fos = null;
            try
            {
                fos = new ByteArrayOutputStream();
    
                System.out.println(XmlUtils.marshaltoString(wordMLPackage
                        .getMainDocumentPart().getJaxbElement(), true, true));
    
                            HTMLSettings htmlSettings = Docx4J.createHTMLSettings();
                htmlSettings.setWmlPackage(wordMLPackage);
      Docx4jProperties.setProperty("docx4j.Convert.Out.HTML.OutputMethodXML",
                        true);
                Docx4J.toHTML(htmlSettings, fos, Docx4J.FLAG_EXPORT_PREFER_XSL);
                wordMLPackage.save(actualFile); 
            }
            catch (Docx4JException e)
            {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            finally{
                try {
                    fos.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }