Search code examples
javams-wordapache-poidocxxwpf

Read sections from Word documents with Apache POI


I have a Word document (.docx) that has been created based on a template. It has 4 sections: title, document type, identification, and signature.

I've been looking for a way to read the sections using Apache POI's XWPF. Any advice?


Solution

  • I have found the solution, I needed as a comment at the top was through the document and get blocks section of the word document.

    Use the information found on the next page

    APACHE SVN TestXWPFSDT

    What it does is through the document rescuing all sections.

    The code used in my project was well

    public class decodeWord {
    
     public static void main(String[] args) {
    
            FileInputStream fis = null;
    
            try {
    
                fis = new FileInputStream("/WORKSPACE/TestDoc.docx");
                XWPFDocument xdoc = new XWPFDocument(OPCPackage.open(fis));
    
                List<AbstractXWPFSDT> sdts = extractAllSDTs(xdoc);
                for (AbstractXWPFSDT sdt : sdts) {
                  System.out.println(sdt.getTag() + " " + sdt.getContent().getText());
                }
    
    
            } catch (Exception e) {
                e.printStackTrace();
            }
    
        }
    
        private static List<AbstractXWPFSDT> extractAllSDTs(XWPFDocument doc) {
    
            List<AbstractXWPFSDT> sdts = new ArrayList<AbstractXWPFSDT>();
    
            List<XWPFHeader> headers = doc.getHeaderList();
            for (XWPFHeader header : headers) {
                sdts.addAll(extractSDTsFromBodyElements(header.getBodyElements()));
            }
            sdts.addAll(extractSDTsFromBodyElements(doc.getBodyElements()));
    
            List<XWPFFooter> footers = doc.getFooterList();
            for (XWPFFooter footer : footers) {
                sdts.addAll(extractSDTsFromBodyElements(footer.getBodyElements()));
            }
    
            for (XWPFFootnote footnote : doc.getFootnotes()) {
                sdts.addAll(extractSDTsFromBodyElements(footnote.getBodyElements()));
            }
            return sdts;
        }
    
        private static List<AbstractXWPFSDT> extractSDTsFromBodyElements(List<IBodyElement> elements) {
            List<AbstractXWPFSDT> sdts = new ArrayList<AbstractXWPFSDT>();
            for (IBodyElement e : elements) {
                if (e instanceof XWPFSDT) {
                    XWPFSDT sdt = (XWPFSDT) e;
                    sdts.add(sdt);
                } else if (e instanceof XWPFParagraph) {
    
                    XWPFParagraph p = (XWPFParagraph) e;
                    for (IRunElement e2 : p.getIRuns()) {
                        if (e2 instanceof XWPFSDT) {
                            XWPFSDT sdt = (XWPFSDT) e2;
                            sdts.add(sdt);
                        }
                    }
                } else if (e instanceof XWPFTable) {
                    XWPFTable table = (XWPFTable) e;
                    sdts.addAll(extractSDTsFromTable(table));
                }
            }
            return sdts;
        }
    
        private static List<AbstractXWPFSDT> extractSDTsFromTable(XWPFTable table) {
    
            List<AbstractXWPFSDT> sdts = new ArrayList<AbstractXWPFSDT>();
            for (XWPFTableRow r : table.getRows()) {
                for (ICell c : r.getTableICells()) {
                    if (c instanceof XWPFSDTCell) {
                        sdts.add((XWPFSDTCell) c);
                    } else if (c instanceof XWPFTableCell) {
                        sdts.addAll(extractSDTsFromBodyElements(((XWPFTableCell) c).getBodyElements()));
                    }
                }
            }
            return sdts;
        }
    
    }