Search code examples
pythonpdfreportlabadobe-reader

Add an acroform to a pdf file with Python


With this Python script I'm able to create a new pdf file called "my_file.pdf" and to add an acroForm editable text box:

from reportlab.pdfgen import canvas
from reportlab.lib.units import cm
from reportlab.lib import colors
from reportlab.lib.pagesizes import A4

pdf = canvas.Canvas("my_file.pdf", bottomup=0)
pdf.drawString(100, 100, "blablabla")
x = pdf.acroForm
x.textfield(value = "hello world!", fillColor = colors.yellow, borderColor = colors.black, textColor = colors.red, borderWidth = 2, borderStyle = 'solid', width = 500, height = 50, x = 50, y = 40, tooltip = None, name = None, fontSize = 20)
pdf.save()

When I open the "my_file.pdf" file with Adobe reader I see this: my_file.pdf

But what I want, is to add the text box in an already existing pdf file called "input.pdf" (see next figure), instead of adding this box in a new pdf file "my_file.pdf".

input.pdf file

To give you a hint, I'm already able to add a draw string (a not-editable text) to the existing pdf file called "input.pdf", and I obtain the edited file called "out.pdf" (see next figure):

from io import BytesIO 
import pikepdf
from reportlab.pdfgen import canvas
import os
from reportlab.lib.units import cm
from reportlab.lib import colors
from reportlab.lib.pagesizes import A4
from PyPDF2 import PdfFileReader, PdfFileWriter
from pypdf import PdfReader

text = "input.pdf"

def generate_stamp(msg, xy):
    x, y = xy
    buf = BytesIO()  # This creates a BytesIO buffer for temporarily storing the generated PDF content.
    c = canvas.Canvas(buf, bottomup=0)  # This creates a canvas object using the BytesIO buffer. The bottomup=0 argument indicates that the coordinates increase from bottom to top (typical for PDFs).
    c.setFontSize(16)
    c.setFillColorCMYK(0, 0, 0, 0, alpha=0.7)
    # c.rect(194, 5, 117, 17, stroke=1, fill=1)
    c.setFillColorCMYK(0, 0, 0, 100, alpha=0.7)
    c.drawString(x, y, msg)
    c.save()
    buf.seek(0)
    return buf

stamp = generate_stamp('SOME TEXT STAMP', (300, 100))

# Add the comment to the first page of the pdf file
pdf_orig = pikepdf.open(text)
pdf_text = pikepdf.open(stamp)
formx_text = pdf_orig.copy_foreign(pdf_text.pages[0].as_form_xobject())
formx_page = pdf_orig.pages[0]
formx_name = formx_page.add_resource(formx_text, pikepdf.Name.XObject) 
stamp_text = pdf_orig.make_stream(b'q 1 0 0 1 0 0 cm %s Do Q' % formx_name)
pdf_orig.pages[0].contents_add(stamp_text)
pdf_orig.save('./out.pdf')

out.pdf file

I would like to have the same thing for the editable text box.


Solution

  • I solved this problem, I'm now able to add the editable text box inside an existing input.pdf file:

    edited pdf file

    This is the complete Python code:

    from pypdf import PdfWriter, PdfReader
    import io
    from reportlab.pdfgen import canvas
    from reportlab.lib import colors
    from reportlab.lib.pagesizes import letter
    
    
    # With this script I'm able to add an editable text box in an existing pdf file.
    # This could be useful to add a text box with a fake word in front of each PII in the pdf file and having the 
    # flexibility to manually remove or modify the text content of the boxes directly from the pdf editor (Adobe reader).
    # The problem is that I'm not able to add multiple different boxes in the same input pdf file.
    # Another problem is that the text box is fixed in the pdf page, this means I can't change the position of the text box
    # from the pdf editor.
    # I want the possibility to eventually delete some text boxes from the pdf editor.
    
    # Generate the editable text box
    def generate_text_box(msg, x_coordinate, y_coordinate):
        buf = io.BytesIO()
        c = canvas.Canvas(buf, pagesize=letter)
        c.drawString(0, 0, "")
        x = c.acroForm
        x.textfield(value = msg, fillColor = colors.transparent, borderColor = colors.transparent, textColor = colors.black, borderWidth = 0.1, borderStyle = 'solid', width = 60, height = 15, x = x_coordinate, y = y_coordinate, tooltip = None, name = None, fontSize = 9)
        c.save()
        buf.seek(0)
        return buf
    
    
    
    # This line uses the generate_text_box function to create an editable text box with the message "Hello World!" placed 
    # at coordinates (450, 700).
    text_box = generate_text_box("Hello World!", 350, 750)
    
    
    # This line reads the byte stream from the text_box and creates a new PDF using the PdfReader class from the PyPDF2 
    # library.
    new_pdf = PdfReader(text_box)
    
    
    # This line reads the existing PDF file named "input.pdf" using the PdfReader class.
    existing_pdf = PdfReader(open("input.pdf", "rb"))
    
    
    # This line creates a PdfWriter object that will be used to write the modified content to the output PDF.
    output = PdfWriter()
    
    
    # This section merges the content of the newly generated text box into the first page of the existing PDF. It then 
    # adds this modified page to the output PDF.
    page = existing_pdf.pages[0] # Accesses the first page of the existing PDF document stored in the existing_pdf object. The pages attribute of a PdfReader object returns a list of page objects, and [0] retrieves the first page from that list.
    page.merge_page(new_pdf.pages[0]) # The merge_page method is used to merge the content of the first page of the new_pdf (which is the generated text box) onto the page from the existing PDF. This means that the content of the generated text box will be added to the content of the existing page.
    output.add_page(page) # After merging the content, the page object (now containing both the existing content and the new text box) is added to the output PdfWriter object. This prepares the modified page to be included in the final output PDF.
    
    
    # These lines open a new file named "output.pdf" in binary write mode, write the modified PDF content using the 
    # PdfWriter, and then close the output stream.
    output_stream = open("output.pdf", "wb") # This line opens a new file named "output.pdf" in binary write mode ("wb"). The output_stream variable is assigned to the file object returned by the open() function. This file object is used to write the content of the output PDF.
    output.write(output_stream) # The write() method of the output PdfWriter object is used to write the modified PDF content to the output_stream. The content that was added to the output object using the add_page() method (which includes the merged text box and existing content) will be written to the file.
    output_stream.close() # After writing the PDF content to the file, this line closes the file. It's important to close the file to ensure that all data is properly flushed and that the file resources are released.