I am new to Docx4j
,Need help to split docx
file based on string using docx4j
Java,So that it writes output into multiple files.
I tried to do the same using Apache POI and got the output,however when tried to convert it into HTML, got issues on style missing,also added styles later, still facing the same issue.
Below is the code using apache poi:
public static int pos = 0;
public static int posc = 0;
public static String ind = "n";
final static int DEFAULT_FONT_SIZE = 10;
public static void main(String[] args) throws FileNotFoundException,
IOException, XmlException {
File file = null;
File outfilep = null;
File outfilec = null;
File dir = new File(PropertyUtils.getProperty("INPUT_DIR"));
String[] files = dir.list();
if (files.length == 0) {
System.out.println("The directory is empty");
} else {
for (String aFile : files) {
System.out.println(aFile);
file = new File(PropertyUtils.getProperty("INPUT_DIR") + aFile
+ "/" + aFile + ".docx");
outfilep = new File(PropertyUtils.getProperty("INPUT_DIR")
+ aFile + "/" + aFile + "-Product.docx");
outfilec = new File(PropertyUtils.getProperty("INPUT_DIR")
+ aFile + "/" + aFile + "-Component.docx");
// Write Soruce file
}
}
XWPFDocument doc = new XWPFDocument(new FileInputStream(file));
XWPFDocument destDoc = new XWPFDocument();
copyLayout(doc, destDoc);
XWPFDocument destDocc = new XWPFDocument();
OutputStream out = new FileOutputStream(outfilep);
OutputStream outc = new FileOutputStream(outfilec);
for (IBodyElement bodyElement : doc.getBodyElements()) {
BodyElementType elementType = bodyElement.getElementType();
if (elementType.name().equals("PARAGRAPH")) {
XWPFParagraph pr = (XWPFParagraph) bodyElement;
if (pr.getText().contains("CONSTRUCTION DETAILS:"))
{
ind = "y";
System.out.println("ind is Y++++++++++++");
}
if (ind == "n")
{
copyStyle(doc, destDoc,
doc.getStyles().getStyle(pr.getStyleID()));
XWPFParagraph dstPr = destDoc.createParagraph();
dstPr.createRun();
pos = destDoc.getParagraphs().size() - 1;
CTPPr ppr = pr.getCTP().getPPr();
if (ppr == null) ppr = pr.getCTP().addNewPPr();
CTSpacing spacing = ppr.isSetSpacing()? ppr.getSpacing() : ppr.addNewSpacing();
spacing.setAfter(BigInteger.valueOf(0));
spacing.setBefore(BigInteger.valueOf(0));
spacing.setLineRule(STLineSpacingRule.AUTO);
spacing.setLine(BigInteger.valueOf(240));
destDoc.setParagraph(pr, pos);
// System.out.println("prod "
// + destDoc.getParagraphArray(pos).getParagraphText());
}
else {
copyStyle(doc, destDocc,
doc.getStyles().getStyle(pr.getStyleID()));
XWPFParagraph dstPrr = destDocc.createParagraph();
dstPrr.createRun();
pos = destDocc.getParagraphs().size() - 1;
CTPPr ppr = pr.getCTP().getPPr();
if (ppr == null) ppr = pr.getCTP().addNewPPr();
CTSpacing spacing = ppr.isSetSpacing()? ppr.getSpacing() : ppr.addNewSpacing();
spacing.setAfter(BigInteger.valueOf(0));
spacing.setBefore(BigInteger.valueOf(0));
spacing.setLineRule(STLineSpacingRule.AUTO);
spacing.setLine(BigInteger.valueOf(240));
destDocc.setParagraph(pr, pos);
//// System.out.println("comp "
//// + destDoc.getParagraphArray(pos).getParagraphText());
}
} else if (elementType.name().equals("TABLE")) {
XWPFTable table = (XWPFTable) bodyElement;
if (ind == "n")
{
copyStyle(doc, destDoc,
doc.getStyles().getStyle(table.getStyleID()));
destDoc.createTable();
pos = destDoc.getTables().size() - 1;
destDoc.setTable(pos, table);
// System.out.println("prodtable " + destDoc.getParagraphArray(pos).getParagraphText());
}
else {
copyStyle(doc, destDocc,
doc.getStyles().getStyle(table.getStyleID()));
destDocc.createTable();
pos = destDocc.getTables().size() - 1;
destDocc.setTable(pos, table);
// System.out.println("comptable " + destDoc.getParagraphArray(pos).getParagraphText());
}
}
}
destDoc.write(out);
destDocc.write(outc);
}
// Copy Styles of Table and Paragraph.
private static void copyStyle(XWPFDocument srcDoc, XWPFDocument destDoc,
XWPFStyle style) {
if (destDoc == null || style == null)
return;
if (destDoc.getStyles() == null) {
destDoc.createStyles();
}
List<XWPFStyle> usedStyleList = srcDoc.getStyles().getUsedStyleList(
style);
for (XWPFStyle xwpfStyle : usedStyleList) {
destDoc.getStyles().addStyle(xwpfStyle);
}
}
private static void copyLayout(XWPFDocument srcDoc, XWPFDocument destDoc)
{
CTPageMar pgMar = srcDoc.getDocument().getBody().getSectPr().getPgMar();
BigInteger bottom = pgMar.getBottom();
BigInteger footer = pgMar.getFooter();
BigInteger gutter = pgMar.getGutter();
BigInteger header = pgMar.getHeader();
BigInteger left = pgMar.getLeft();
BigInteger right = pgMar.getRight();
BigInteger top = pgMar.getTop();
CTPageMar addNewPgMar = destDoc.getDocument().getBody().addNewSectPr().addNewPgMar();
addNewPgMar.setBottom(bottom);
addNewPgMar.setFooter(footer);
addNewPgMar.setGutter(gutter);
addNewPgMar.setHeader(header);
addNewPgMar.setLeft(left);
addNewPgMar.setRight(right);
addNewPgMar.setTop(top);
CTPageSz pgSzSrc = srcDoc.getDocument().getBody().getSectPr().getPgSz();
BigInteger code = pgSzSrc.getCode();
BigInteger h = pgSzSrc.getH();
Enum orient = pgSzSrc.getOrient();
BigInteger w = pgSzSrc.getW();
CTPageSz addNewPgSz = destDoc.getDocument().getBody().addNewSectPr().addNewPgSz();
addNewPgSz.setCode(code);
addNewPgSz.setH(h);
addNewPgSz.setOrient(orient);
addNewPgSz.setW(w);
}
I hope this will solve the issue.
public class SplitUsingDocx4j {
/**
* @param args
* @throws Docx4JException
* @throws FileNotFoundException
*/
public static void main(String[] args) throws Docx4JException,
FileNotFoundException {
File dir = new File(PropertyUtils.getProperty("INPUT_DIR"));
String[] files = dir.list();
File file = null;
if (files.length == 0) {
System.out.println("The directory is empty");
} else {
for (String aFile : files) {
System.out.println(aFile);
file = new File(PropertyUtils.getProperty("INPUT_DIR") + aFile
+ "/" + aFile + ".docx");
}
}
// Creating new documents
WordprocessingMLPackage doc1 = WordprocessingMLPackage.createPackage();
WordprocessingMLPackage doc2 = WordprocessingMLPackage.createPackage();
// loading existing document
WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage
.load(new java.io.File(file.getPath()));
MainDocumentPart tempDocPart = wordMLPackage.getMainDocumentPart();
List<Object> obj = wordMLPackage.getMainDocumentPart().getContent();
// for copying styles from existing doc to new docs
StyleDefinitionsPart sdp = tempDocPart.getStyleDefinitionsPart();
Styles tempStyle = sdp.getJaxbElement();
doc1.getMainDocumentPart().getStyleDefinitionsPart()
.setJaxbElement(tempStyle);
doc2.getMainDocumentPart().getStyleDefinitionsPart()
.setJaxbElement(tempStyle);
boolean flag = false;
for (Object object : obj) {
if (!flag) {
if (object.toString().equalsIgnoreCase("CONSTRUCTION DETAILS:")) {
flag = true;
}
doc1.getMainDocumentPart().addObject(object);
} else {
doc2.getMainDocumentPart().addObject(object);
}
}
String fileName = file.getName().toString().replace(".docx", "");
doc1.save(new File(fileName + "-1.docx"));
doc2.save(new File(fileName + "-2.docx"));
}}