Search code examples
javapdfpdfbox

How to replace centered text in a PDF with PDFBox


I use the PDFTextReplacement example. It does the replacement as expected, In case my text is left aligned. But if my input pdf has a text centered, it replaces the text as a left aligned. Ok, so I have to recalculate the right starting point.

For that reason I have two targets or questions:

  • How to determine the alignment?
  • How to calculate the right starting point?

Here is my code:

public PDDocument doIt(String inputFile, Map<String, String> text)
        throws IOException, COSVisitorException {
    // the document
    PDDocument doc = null;

    doc = PDDocument.load(inputFile);
    List pages = doc.getDocumentCatalog().getAllPages();
    for (int i = 0; i < pages.size(); i++) {
        PDPage page = (PDPage) pages.get(i);
        PDStream contents = page.getContents();

        PDFStreamParser parser = new PDFStreamParser(contents.getStream());
        parser.parse();
        List tokens = parser.getTokens();
        for (int j = 0; j < tokens.size(); j++) {
            Object next = tokens.get(j);

            if (next instanceof PDFOperator) {

                PDFOperator op = (PDFOperator) next;

                // Tj and TJ are the two operators that display
                // strings in a PDF

                String pstring = "";
                int prej = 0;
                if (op.getOperation().equals("Tj")) {
                    // Tj takes one operator and that is the string
                    // to display so lets update that operator
                    COSString previous = (COSString) tokens.get(j - 1);
                    String string = previous.getString();
                    // System.out.println(j + " " + string);
                    if (j == prej) {
                        pstring += string;
                    } else {
                        prej = j;
                        pstring = string;
                    }

                    previous.reset();
                    previous.append(string.getBytes("ISO-8859-1"));
                } else if (op.getOperation().equals("TJ")) {
                    COSArray previous = (COSArray) tokens.get(j - 1);
                    for (int k = 0; k < previous.size(); k++) {
                        Object arrElement = previous.getObject(k);
                        if (arrElement instanceof COSString) {
                            COSString cosString = (COSString) arrElement;
                            String string = cosString.getString();

                            if (j == prej) {
                                pstring += string;
                            } else {
                                prej = j;
                                pstring = string;
                            }

                            cosString.reset();
                            // cosString.append(string
                            // .getBytes("ISO-8859-1"));
                        }

                    }

                    COSString cosString2 = (COSString) previous
                            .getObject(0);

                    for (int t = 1; t < previous.size(); t++)
                        previous.remove(t);

                    // cosString2.setNeedToBeUpdate(true);

                    if (text.containsKey(pstring.trim())) {

                        String textValue = text.get(pstring.trim());
                        cosString2.append(textValue.getBytes("ISO-8859-1"));

                        for (int k = 1; k < previous.size(); k++) {
                            previous.remove(k);

                        }
                    }

                }
            }
        }
        // now that the tokens are updated we will replace the
        // page content stream.
        PDStream updatedStream = new PDStream(doc);
        OutputStream out = updatedStream.createOutputStream();
        ContentStreamWriter tokenWriter = new ContentStreamWriter(out);
        tokenWriter.writeTokens(tokens);
        page.setContents(updatedStream);
    }
    return doc;
}

Solution

  • you can use this function:

     public void doIt( String inputFile, String outputFile, String strToFind, String message)
                throws IOException, COSVisitorException
            {
                // the document
                PDDocument doc = null;
                try
                {
                    doc = PDDocument.load( inputFile );
                    List pages = doc.getDocumentCatalog().getAllPages();
                    for( int i=0; i<pages.size(); i++ )
                    {
                        PDPage page = (PDPage)pages.get( i );
                        PDStream contents = page.getContents();
                        PDFStreamParser parser = new PDFStreamParser(contents.getStream() );
                        parser.parse();
                        List tokens = parser.getTokens();
                        for( int j=0; j<tokens.size(); j++ )
                        {
                            Object next = tokens.get( j );
                            if( next instanceof PDFOperator )
                            {
                                PDFOperator op = (PDFOperator)next;
                                //Tj and TJ are the two operators that display
                                //strings in a PDF
                                if( op.getOperation().equals( "Tj" ) )
                                {
                                    //Tj takes one operator and that is the string
                                    //to display so lets update that operator
                                    COSString previous = (COSString)tokens.get( j-1 );
                                    String string = previous.getString();
                                    string = string.replaceFirst( strToFind, message );
                                    previous.reset();
                                    previous.append( string.getBytes() );
                                }
                                else if( op.getOperation().equals( "TJ" ) )
                                {
                                    COSArray previous = (COSArray)tokens.get( j-1 );
                                    for( int k=0; k<previous.size(); k++ )
                                    {
                                        Object arrElement = previous.getObject( k );
                                        if( arrElement instanceof COSString )
                                        {
                                            COSString cosString = (COSString)arrElement;
                                            String string = cosString.getString();
                                            string = string.replaceFirst( strToFind, message );
                                            cosString.reset();
                                            cosString.append( string.getBytes() );
                                        }
                                    }
                                }
                            }
                        }
                        //now that the tokens are updated we will replace the
                        //page content stream.
                        PDStream updatedStream = new PDStream(doc);
                        OutputStream out = updatedStream.createOutputStream();
                        ContentStreamWriter tokenWriter = new ContentStreamWriter(out);
                        tokenWriter.writeTokens( tokens );
                        page.setContents( updatedStream );
                    }
                    doc.save( outputFile );
                }
                finally
                {
                    if( doc != null )
                    {
                        doc.close();
                    }
                }
            }