Search code examples
pythondjangopdfpypdfpdfrw

Python Django PDF Flattening of Form Fields


I have a project where I need to fill out pre-made PDFs and the most logical solution that comes to mind to accomplish this is to make the pre-made PDFs into PDF forms so there are tags where input values are supposed to go, then I can look through the form tags in the PDF and line them up with a dictionary of values.

I have accomplished this using PyPDF2. Overall, I took an image of a web form and then opened Acrobat and created a PDF form based on the fields seen in the image, then used PyPDF2 for filling out the PDF form fields, but the caveat is that printing those filled in values seems buggy in some browsers, Firefox being one.

How do I got about converting my PDF-Form to a standard/flat PDF so I can keep the pre-populated values, but lose the editable fields (as I think this is the issue)?

from io import BytesIO

import PyPDF2
from django.http import HttpResponse

from PyPDF2.generic import BooleanObject, NameObject, IndirectObject


def pdf_view(request):
    template = 'templates/template.pdf'

    outfile = "templates/test.pdf"

    input_stream = open(template, "rb")
    pdf_reader = PyPDF2.PdfFileReader(input_stream, strict=False)
    if "/AcroForm" in pdf_reader.trailer["/Root"]:
        pdf_reader.trailer["/Root"]["/AcroForm"].update(
            {NameObject("/NeedAppearances"): BooleanObject(True)})

    pdf_writer = PyPDF2.PdfFileWriter()
    set_need_appearances_writer(pdf_writer)
    if "/AcroForm" in pdf_writer._root_object:
        # Acro form is form field, set needs appearances to fix printing issues
        pdf_writer._root_object["/AcroForm"].update(
            {NameObject("/NeedAppearances"): BooleanObject(True)})

    data_dict = {
        'first_name': 'John',
        'last_name': 'Smith',
        'email': '[email protected]',
        'phone': '889-998-9967',
        'company': 'Amazing Inc.',
        'job_title': 'Dev',
        'street': '123 Main Way',
        'city': 'Johannesburg',
        'state': 'New Mexico',
        'zip': 96705,
        'country': 'USA',
        'topic': 'Who cares...'

    }

    pdf_writer.addPage(pdf_reader.getPage(0))
    pdf_writer.updatePageFormFieldValues(pdf_writer.getPage(0), data_dict)

    output_stream = BytesIO()
    pdf_writer.write(output_stream)

    # print(fill_in_pdf(template, data_dict).getvalue())

    # fill_in_pdf(template, data_dict).getvalue()
    response = HttpResponse(output_stream.getvalue(), content_type='application/pdf')
    response['Content-Disposition'] = 'inline; filename="completed.pdf"'
    input_stream.close()

    return response


def set_need_appearances_writer(writer):
    try:
        catalog = writer._root_object
        # get the AcroForm tree and add "/NeedAppearances attribute
        if "/AcroForm" not in catalog:
            writer._root_object.update({
                NameObject("/AcroForm"): IndirectObject(len(writer._objects), 0, writer)})

        need_appearances = NameObject("/NeedAppearances")
        writer._root_object["/AcroForm"][need_appearances] = BooleanObject(True)


    except Exception as e:
        print('set_need_appearances_writer() catch : ', repr(e))

    return writer

Solution

  • The solution was super simple, read the docs when in doubt. In this case the Adobe Docs noted that all I needed to do was change the bit position of the field flags to 1, making the field ReadOnly, like so:

    from io import BytesIO
    
    import PyPDF2
    from django.http import HttpResponse
    
    from PyPDF2.generic import BooleanObject, NameObject, IndirectObject, NumberObject
    
    
    def pdf(request):
        template = 'templates/template.pdf'
    
        outfile = "templates/test.pdf"
    
        input_stream = open(template, "rb")
        pdf_reader = PyPDF2.PdfFileReader(input_stream, strict=False)
        if "/AcroForm" in pdf_reader.trailer["/Root"]:
            pdf_reader.trailer["/Root"]["/AcroForm"].update(
                {NameObject("/NeedAppearances"): BooleanObject(True)})
    
        pdf_writer = PyPDF2.PdfFileWriter()
        set_need_appearances_writer(pdf_writer)
        if "/AcroForm" in pdf_writer._root_object:
            # Acro form is form field, set needs appearances to fix printing issues
            pdf_writer._root_object["/AcroForm"].update(
                {NameObject("/NeedAppearances"): BooleanObject(True)})
    
        data_dict = {
            'first_name': 'John\n',
            'last_name': 'Smith\n',
            'email': '[email protected]\n',
            'phone': '889-998-9967\n',
            'company': 'Amazing Inc.\n',
            'job_title': 'Dev\n',
            'street': '123 Main Way\n',
            'city': 'Johannesburg\n',
            'state': 'New Mexico\n',
            'zip': 96705,
            'country': 'USA\n',
            'topic': 'Who cares...\n'
    
        }
    
        pdf_writer.addPage(pdf_reader.getPage(0))
        page = pdf_writer.getPage(0)
        pdf_writer.updatePageFormFieldValues(page, data_dict)
        for j in range(0, len(page['/Annots'])):
            writer_annot = page['/Annots'][j].getObject()
            for field in data_dict:
                # -----------------------------------------------------BOOYAH!
                if writer_annot.get('/T') == field:
                    writer_annot.update({
                        NameObject("/Ff"): NumberObject(1)
                    })
                 # -----------------------------------------------------
        output_stream = BytesIO()
        pdf_writer.write(output_stream)
    
        response = HttpResponse(output_stream.getvalue(), content_type='application/pdf')
        response['Content-Disposition'] = 'inline; filename="completed.pdf"'
        input_stream.close()
    
        return response
    
    
    def set_need_appearances_writer(writer):
        try:
            catalog = writer._root_object
            # get the AcroForm tree and add "/NeedAppearances attribute
            if "/AcroForm" not in catalog:
                writer._root_object.update({
                    NameObject("/AcroForm"): IndirectObject(len(writer._objects), 0, writer)})
    
            need_appearances = NameObject("/NeedAppearances")
            writer._root_object["/AcroForm"][need_appearances] = BooleanObject(True)
          
    
        except Exception as e:
            print('set_need_appearances_writer() catch : ', repr(e))
        
        return writer