Search code examples
javapdfitextgeneratepdfa

I am not capable to generate a PDF/A


I am trying to generate a PDF/A-1b in Java, I tryed a lot different things but none of then have work for me, every PDF I generate is no capable of pass the PDF/A validation. (I am using VeraPDF to validate.)

My code so far:

public byte[] convertirAPdfA(byte[] pdf) throws IOException, DocumentException {
        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();

        // Leer el documento PDF original
        PdfReader reader = new PdfReader(pdf);
        Document document = new Document(reader.getPageSizeWithRotation(1));

        // Crear un PdfAWriter para escribir el nuevo PDF/A
        PdfAWriter writer = PdfAWriter.getInstance(document, outputStream, PdfAConformanceLevel.PDF_A_1B);

        // Abrir el documento para escribir
        document.open();

        // Crear y agregar XmpMetadata
        writer.createXmpMetadata();

        // Crear y embeder el perfil ICC
        ICC_Profile icc = ICC_Profile.getInstance(getClass().getResourceAsStream("/icc/sRGB-IEC61966-2.1.icc"));
        writer.setOutputIntents("Custom", "", "http://www.color.org", "sRGB IEC61966-2.1", icc);

        // Crear la fuente
        BaseFont font = BaseFont.createFont("/fonts/BookAntiqua/BookAntiqua.ttf", BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
        writer.getDirectContent().setFontAndSize(font, 12);

        // Añadir páginas del PDF original al nuevo PDF/A
        PdfContentByte cb = writer.getDirectContent();
        for (int i = 1; i <= reader.getNumberOfPages(); i++) {
            document.newPage();
            PdfImportedPage page = writer.getImportedPage(reader, i);
            cb.addTemplate(page, 0, 0);
        }

        // Crear los metadatos XMP
        XMPMeta xmpMeta = XMPMetaFactory.create();

        try {
            // Agregar metadatos DC (Dublin Core)
            xmpMeta.setProperty(XMPConst.NS_DC, "dc:format", "application/pdf");
            xmpMeta.setProperty(XMPConst.NS_DC, "dc:title", "Title of the Document");
            xmpMeta.setProperty(XMPConst.NS_DC, "dc:creator", "Your Name");
            xmpMeta.setProperty(XMPConst.NS_DC, "dc:description", "Description of the document");

            // Agregar esquema de identificación PDF/A
            XMPMetaFactory.getSchemaRegistry().registerNamespace("http://www.aiim.org/pdfa/ns/id/", "pdfaid");
            xmpMeta.setProperty("http://www.aiim.org/pdfa/ns/id/", "pdfaid:part", "1");
            xmpMeta.setProperty("http://www.aiim.org/pdfa/ns/id/", "pdfaid:conformance", "B");

            // Serializar los metadatos XMP a un byte array
            ByteArrayOutputStream xmpOutputStream = new ByteArrayOutputStream();
            XMPMetaFactory.serialize(xmpMeta, xmpOutputStream, new SerializeOptions().setUseCompactFormat(true));

            // Establecer los metadatos XMP en el documento
            writer.setXmpMetadata(xmpOutputStream.toByteArray());
        } catch (XMPException e) {
            e.printStackTrace();
        }

        // Cerrar el documento y el lector
        document.close();
        reader.close();

        // Retornar el PDF/A como byte[]
        return outputStream.toByteArray();
    }

I am using a Maven project and have the dependencies:

<dependency>
    <groupId>com.itextpdf</groupId>
    <artifactId>itext-pdfa</artifactId>
    <version>5.5.13.1</version>
</dependency>

<dependency>
    <groupId>com.itextpdf</groupId>
    <artifactId>itextpdf</artifactId>
    <version>5.5.13.1</version>
</dependency>

For the moment, I am testing the code with an original PDF/A capable that passes the validation but as I write in the beginning the result is no ok.

The error of the validator:

MainXMPPackage  
Identification_size == 1    
root/document[0]/metadata[0](2 0 obj PDMetadata)/XMPPackage[0]
The document metadata stream doesn't contains PDF/A Identification Schema

Solution

  • After some testing, I have a working code capable of generating PDF/A-1B. It use a pdf bytes array as parameter and a indicator of the level of conformance.

    Despite it tries to generate PDF/A-1A, it doesn't work, i´ll be updating the code when I found the right code for the complete functionality

    Also I post the code I came with to check the conformance level of an existing pdf

    public byte[] transformAPdfA(byte[] documento, PdfAConformanceLevel nivelPdfA) throws IOException, DocumentException {
    
        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
        PdfReader reader = new PdfReader(documento);
        Document document = new Document(reader.getPageSizeWithRotation(1));
        PdfAConformanceLevel conformanceLevel = (nivelPdfA == PdfAConformanceLevel.PDF_A_1A) ?
                PdfAConformanceLevel.PDF_A_1A : PdfAConformanceLevel.PDF_A_1B;
        PdfAWriter writer = PdfAWriter.getInstance(document, outputStream, conformanceLevel);
    
        if (nivelPdfA == PdfAConformanceLevel.PDF_A_1A) {
            writer.setTagged();
        }
    
        document.open();
        writer.createXmpMetadata();
    
        writer.setOutputIntents("Custom", "", "http://www.color.org", "sRGB IEC61966-2.1",
                ICC_Profile.getInstance(getClass().getResourceAsStream("/icc/sRGB-IEC61966-2.1.icc")));
    
        writer.getDirectContent().setFontAndSize(BaseFont.createFont("/fonts/BookAntiqua/BookAntiqua.ttf",
                BaseFont.IDENTITY_H, BaseFont.EMBEDDED), 12);
    
        PdfContentByte cb = writer.getDirectContent();
        for (int i = 1; i <= reader.getNumberOfPages(); i++) {
            document.newPage();
            PdfImportedPage page = writer.getImportedPage(reader, i);
            cb.addTemplate(page, 0, 0);
        }
    
        document.close();
        reader.close();
    
        return outputStream.toByteArray();
    }
    
    public PdfAConformanceLevel obtainPdfaConformanceLevel(byte[] documento) throws AmapCsvPdfServiceException {
        PdfReader reader;
        byte[] metadataBytes = null;
        
        try {
            reader = new PdfReader(documento);
            metadataBytes = reader.getMetadata();
            reader.close();
        
            if (metadataBytes != null) {
                
                XMPMeta xmpMeta;
                xmpMeta = XMPMetaFactory.parseFromBuffer(metadataBytes);
    
                String pdfaPart = xmpMeta.getPropertyString("http://www.aiim.org/pdfa/ns/id/", "pdfaid:part");
                String pdfaConformance = xmpMeta.getPropertyString("http://www.aiim.org/pdfa/ns/id/", "pdfaid:conformance");
    
                if ("1".equals(pdfaPart)) {
                    if ("A".equalsIgnoreCase(pdfaConformance)) {
                        return PdfAConformanceLevel.PDF_A_1A;   //PDF/A-1A
                    } else if ("B".equalsIgnoreCase(pdfaConformance)) {
                        return PdfAConformanceLevel.PDF_A_1B;   //PDF/A-1B
                    }
                }
            }
        } catch (XMPException | IOException e) {
            throw new AmapCsvPdfServiceException("Error al obtener la informacion de Conformidad PDF/A", e);
        }
        
        return null; // No es un PDF/A o no se pudo determinar la conformidad
    }