How can you correctly set the Author, Title and Subject attributes for a PDF File using Reportlab? I have found the methods in the Reportlab User Guide on page 56, but I am not sure how to implement them correctly.
Below in my PDF cropping and scaling script, I have added the annotations
method, but I don't know where to call them from, or if a whole new Canvas
object is needed. Please excuse the lengthy code, but only after line 113 is the doc
being created, above are mostly auxiliary methods, including the annotations
method on line 30.
# All the necessary parameters are accessible after line 92,
# but can of course be changed manually in the Code
# imports for the crop, rename to avoid conflict with reportlab Image import
from PIL import Image as imgPIL
from PIL import ImageChops, ImageOps, ImageFilter
import os.path, sys
# import for the PDF creation
import glob
from reportlab.lib.pagesizes import A4
from reportlab.lib import utils
from reportlab.platypus import Image, SimpleDocTemplate, Spacer
from reportlab.pdfgen import canvas
# get os path for Cropping
path = (os.path.dirname(os.path.abspath("cropPDF.py")))
dirs = os.listdir(path)
def trim(im, border="white"):
bg = imgPIL.new(im.mode, im.size, border)
diff = ImageChops.difference(im, bg)
bbox = diff.getbbox()
if bbox:
return im.crop(bbox)
def annotations(canvas):
canvas.setAuthor("the ReportLab Team")
canvas.setTitle("ReportLab PDF Generation User Guide")
canvas.setSubject("How to Generate PDF files using the ReportLab modules")
def findMaxWidth():
maxWidth = 0
for item in dirs:
try:
fullpath = os.path.join(path, item)
if os.path.isfile(fullpath):
im = imgPIL.open(fullpath)
maxWidth = max(maxWidth, im.size[0])
except:
pass
return maxWidth
def padImages(docHeight):
maxWidth = findMaxWidth()
for item in dirs:
try:
fullpath = os.path.join(path, item)
if os.path.isfile(fullpath):
im = imgPIL.open(fullpath)
f, e = os.path.splitext(fullpath)
width, height = im.size # get the image dimensions, the height is needed for the blank image
if not docHeight <= height: # to prevent oversized images from bein padded, such that they remain centered
image = imgPIL.new('RGB', (maxWidth, height),
(255, 255, 255)) # create a white image with the max width
image.paste(im, (0, 0)) # paste the original image overtop the blank one, flush on the left side
image.save(f + ".png", "PNG", quality=100)
except:
pass
def crop():
for item in dirs:
try:
fullpath = os.path.join(path, item)
if os.path.isfile(fullpath):
im = imgPIL.open(fullpath)
f, e = os.path.splitext(fullpath)
imCrop = trim(im, "white")
imCrop.save(f + ".png", "PNG", quality=100)
except:
pass
def add_page_number(canvas, doc):
canvas.saveState()
canvas.setFont('Times-Roman', numberFontSize)
page_number_text = "%d" % (doc.page)
canvas.drawCentredString(
pageNumberSpacing * mm,
pageNumberSpacing * mm,
page_number_text
)
canvas.restoreState()
#############################
executeCrop = True
executePad = True
outputName = "output.pdf" #The name of the file that will be created
fileAuthor = "Roman Stadler" #these 3 attributes are visible in the file info menu
fileTitle = ""
fileSubject = ""
margin = 0.5
imageWidthDefault = 550
spacerHeight = 7
scalingIfImageTooTall = 0.95 # larger than 95 can result in an empty page after the image
includePagenumbers = True
numberFontSize = 10
pageNumberSpacing = 5
############################
doc = SimpleDocTemplate(
outputName,
topMargin=margin * mm,
leftMargin=margin * mm,
rightMargin=margin * mm,
bottomMargin=margin * mm,
pagesize=A4
)
if executeCrop:
crop()
if executePad:
padImages(doc.height)
filelist = glob.glob("*.png") # Get a list of files in the current directory
filelist.sort()
story = [] # create the list of images for the PDF
for fn in filelist:
img = utils.ImageReader(fn)
img_width, img_height = img.getSize() # necessary for the aspect ratio
aspect = img_height / float(img_width)
documentHeight = doc.height
imageWidth = imageWidthDefault
imageHeight = imageWidth * aspect
if imageHeight > documentHeight:
imageHeight = documentHeight * scalingIfImageTooTall
imageWidth = imageHeight / aspect
img = Image(
fn,
width=imageWidth,
height=imageHeight
)
story.append(img)
space = Spacer(width=0, height=spacerHeight)
story.append(space)
if includePagenumbers and not len(filelist) == 0: # if pagenumbers are desired, or not
doc.build(
story,
onFirstPage=add_page_number,
onLaterPages=add_page_number,
)
elif not len(filelist) == 0:
doc.build(story)
else: # to prevent an empty PDF that can't be opened
print("no files found")
In the meantime, I have found another way, that does not use reportlab, but instead relies on PyPDF2:
The following import is needed:
# PyPDF2 for the metadata modification
from PyPDF2 import PdfFileReader, PdfFileWriter
Then the metadata can be edited like this:
author = "Roman Stadler"
title = "CropPDF"
subject = "Stackoverflow"
#rest of the script
#attemp the metadate edit
try:
file = open('output.pdf', 'rb+')
reader = PdfFileReader(file)
writer = PdfFileWriter()
writer.appendPagesFromReader(reader)
metadata = reader.getDocumentInfo()
writer.addMetadata(metadata)
writer.addMetadata({
'/Author': author,
'/Title': title,
'/Subject' : subject,
'/Producer' : "CropPDF",
'/Creator' : "CropPDF",
})
writer.write(file)
file.close()
except:
print("Error while editing metadata")