While writing PDF file to HTML file format using the code below...
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Writer;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
import org.fit.pdfdom.PDFDomTree;
import org.fit.pdfdom.PDFDomTreeConfig;
import org.fit.pdfdom.resource.HtmlResourceHandler;
import org.fit.pdfdom.resource.SaveResourceToDirHandler;
public class PdfToHtmlConverter {
public String pdfToHtmlFileWriter(File file, String outputFilePath, String outputFileName) throws InvalidPasswordException, IOException, ParserConfigurationException {
// load the PDF file using PDFBox
PDDocument pdf = PDDocument.load(file);
PDFDomTreeConfig config = PDFDomTreeConfig.createDefaultConfig();
HtmlResourceHandler fontHandler = new SaveResourceToDirHandler();
config.setFontHandler(fontHandler);
HtmlResourceHandler imageHandler = new SaveResourceToDirHandler();
config.setImageHandler(imageHandler);
// create the DOM parser
PDFDomTree parser = new PDFDomTree();
// parse the file and get the DOM Document
String outputFile = outputFilePath + File.separator + outputFileName + ".html";
try (Writer woutput = new PrintWriter(new BufferedWriter(new FileWriter(outputFile)))) {
parser.writeText(pdf, woutput);
} catch(Exception e) {
e.printStackTrace();
}
pdf.close();
return outputFile;
}
}
And the build.gradle file has following dependency list...
dependencies {
compile fileTree(dir: 'lib', include: ['*.jar'])
compile group: 'org.apache.pdfbox', name: 'pdfbox', version: '2.0.6'
compile group: 'org.apache.pdfbox', name: 'pdfbox-tools', version: '2.0.6'
compile group: 'org.apache.logging.log4j', name: 'log4j', version: '2.11.0'
compile group: 'org.apache.logging.log4j', name: 'log4j-api', version: '2.6.1'
compile group: 'org.apache.logging.log4j', name: 'log4j-core', version: '2.6.1'
compile group: 'javax.mail', name: 'mail', version: '1.4.1'
compile group: 'org.bouncycastle', name: 'bcmail-jdk15', version: '1.46'
compile group: 'org.bouncycastle', name: 'bcprov-jdk15on', version: '1.47'
compile group: 'net.sf.ehcache', name: 'ehcache-core', version: '2.4.6'
compile group: 'com.google.guava', name: 'guava', version: '11.0.2'
compile group: 'redis.clients', name: 'jedis', version: '2.9.0'
compile group: 'org.apache.poi', name: 'poi-ooxml', version: '3.17'
compile group: 'org.apache.poi', name: 'poi', version: '3.17'
compile group: 'net.sf.cssbox', name: 'pdf2dom', version: '1.7'
compile group: 'com.levigo.jbig2', name: 'levigo-jbig2-imageio', version: '1.6.5'
compile 'com.google.code.gson:gson:2.8.2'
compile 'org.json:json:20180130'
}
Aw Snap! Got the following message from JDK...
[org.glassfish.jersey.server.ContainerException: java.util.ServiceConfigurationError: com.levigo.jbig2.util.log.LoggerBridge: Provider com.levigo.jbig2.util.log.JDKLoggerBridge not a subtype] with root cause
java.util.ServiceConfigurationError: com.levigo.jbig2.util.log.LoggerBridge: Provider com.levigo.jbig2.util.log.JDKLoggerBridge not a subtype
at java.util.ServiceLoader.fail(Unknown Source)
at java.util.ServiceLoader.access$300(Unknown Source)
at java.util.ServiceLoader$LazyIterator.nextService(Unknown Source)
at java.util.ServiceLoader$LazyIterator.next(Unknown Source)
at java.util.ServiceLoader$1.next(Unknown Source)
at com.levigo.jbig2.util.log.LoggerFactory.getLogger(LoggerFactory.java:42)
at com.levigo.jbig2.util.log.LoggerFactory.getLogger(LoggerFactory.java:48)
at com.levigo.jbig2.JBIG2ImageReader.<clinit>(JBIG2ImageReader.java:45)
at com.levigo.jbig2.JBIG2ImageReaderSpi.createReaderInstance(JBIG2ImageReaderSpi.java:116)
at javax.imageio.spi.ImageReaderSpi.createReaderInstance(Unknown Source)
at javax.imageio.ImageIO$ImageReaderIterator.next(Unknown Source)
at javax.imageio.ImageIO$ImageReaderIterator.next(Unknown Source)
at org.apache.pdfbox.filter.Filter.findImageReader(Filter.java:133)
at org.apache.pdfbox.filter.JBIG2Filter.decode(JBIG2Filter.java:54)
at org.apache.pdfbox.cos.COSInputStream.create(COSInputStream.java:69)
at org.apache.pdfbox.cos.COSStream.createInputStream(COSStream.java:167)
at org.apache.pdfbox.pdmodel.common.PDStream.createInputStream(PDStream.java:235)
at org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject.<init>(PDImageXObject.java:125)
at org.apache.pdfbox.pdmodel.graphics.PDXObject.createXObject(PDXObject.java:70)
at org.apache.pdfbox.pdmodel.PDResources.getXObject(PDResources.java:409)
at org.fit.pdfdom.PDFBoxTree.processFontResources(PDFBoxTree.java:397)
at org.fit.pdfdom.PDFBoxTree.updateFontTable(PDFBoxTree.java:361)
at org.fit.pdfdom.PDFDomTree.updateFontTable(PDFDomTree.java:544)
at org.fit.pdfdom.PDFBoxTree.processPage(PDFBoxTree.java:206)
at org.apache.pdfbox.text.PDFTextStripper.processPages(PDFTextStripper.java:319)
at org.apache.pdfbox.text.PDFTextStripper.writeText(PDFTextStripper.java:266)
at org.fit.pdfdom.PDFDomTree.createDOM(PDFDomTree.java:218)
at org.fit.pdfdom.PDFDomTree.writeText(PDFDomTree.java:194)
at com.pype.html.converter.PdfToHtmlConverter.pdfToHtmlFileWriter(PdfToHtmlConverter.java:91)
at com.pype.drawings.slicing.VerticalSlicer.convertCompleteSinglePagePdftoHtml(VerticalSlicer.java:540)
at com.pype.drawings.slicing.VerticalSlicer.convertCompletePdfPageToHtml(VerticalSlicer.java:104)
at com.pype.pdf.schedules.extractor.ExtractSchedules.generateHtmlFiles(ExtractSchedules.java:344)
at com.pype.pdf.schedules.extractor.ExtractSchedules.getIdentifiedSchedulesUsingElements(ExtractSchedules.java:218)
at com.pype.solr.rest.api.ExtractPDFDrawing.processUploadedPDFFile(ExtractPDFDrawing.java:511)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
at java.lang.reflect.Method.invoke(Unknown Source)
at org.glassfish.jersey.server.model.internal.ResourceMethodInvocationHandlerFactory$1.invoke(ResourceMethodInvocationHandlerFactory.java:81)
at org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher$1.run(AbstractJavaResourceMethodDispatcher.java:144)
at org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher.invoke(AbstractJavaResourceMethodDispatcher.java:161)
at org.glassfish.jersey.server.model.internal.JavaResourceMethodDispatcherProvider$TypeOutInvoker.doDispatch(JavaResourceMethodDispatcherProvider.java:205)
at org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher.dispatch(AbstractJavaResourceMethodDispatcher.java:99)
at org.glassfish.jersey.server.model.ResourceMethodInvoker.invoke(ResourceMethodInvoker.java:389)
at org.glassfish.jersey.server.model.ResourceMethodInvoker.apply(ResourceMethodInvoker.java:347)
at org.glassfish.jersey.server.model.ResourceMethodInvoker.apply(ResourceMethodInvoker.java:102)
at org.glassfish.jersey.server.ServerRuntime$2.run(ServerRuntime.java:326)
at org.glassfish.jersey.internal.Errors$1.call(Errors.java:271)
at org.glassfish.jersey.internal.Errors$1.call(Errors.java:267)
at org.glassfish.jersey.internal.Errors.process(Errors.java:315)
at org.glassfish.jersey.internal.Errors.process(Errors.java:297)
at org.glassfish.jersey.internal.Errors.process(Errors.java:267)
at org.glassfish.jersey.process.internal.RequestScope.runInScope(RequestScope.java:317)
at org.glassfish.jersey.server.ServerRuntime.process(ServerRuntime.java:305)
at org.glassfish.jersey.server.ApplicationHandler.handle(ApplicationHandler.java:1154)
at org.glassfish.jersey.servlet.WebComponent.serviceImpl(WebComponent.java:473)
at org.glassfish.jersey.servlet.WebComponent.service(WebComponent.java:427)
at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.java:388)
at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.java:341)
at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.java:228)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:231)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:166)
at org.apache.tomcat.websocket.server.WsFilter.doFilter(WsFilter.java:53)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:193)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:166)
at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:199)
at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:96)
at org.apache.catalina.authenticator.AuthenticatorBase.invoke(AuthenticatorBase.java:502)
at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:140)
at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:81)
at org.apache.catalina.valves.AbstractAccessLogValve.invoke(AbstractAccessLogValve.java:651)
at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:87)
at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:342)
at org.apache.coyote.http11.Http11Processor.service(Http11Processor.java:501)
at org.apache.coyote.AbstractProcessorLight.process(AbstractProcessorLight.java:66)
at org.apache.coyote.AbstractProtocol$ConnectionHandler.process(AbstractProtocol.java:754)
at org.apache.tomcat.util.net.NioEndpoint$SocketProcessor.doRun(NioEndpoint.java:1376)
at org.apache.tomcat.util.net.SocketProcessorBase.run(SocketProcessorBase.java:49)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at org.apache.tomcat.util.threads.TaskThread$WrappingRunnable.run(TaskThread.java:61)
at java.lang.Thread.run(Unknown Source)
After searching the little bit more about this error, no clue is there. If anybody has some idea, please give some suggestions on this.
Thanks
Please update to the latest version of the jbig2 decoder, which is 3.0.2. The jbig2 decoder is now a part of Apache PDFBox, thanks to levigo solutions GmbH. For maven, use this:
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>jbig2-imageio</artifactId>
<version>3.0.2</version>
</dependency>
Or use the direct download.