I am trying to crop a pdf and save it to s3 with same name using lambda. I am getting error on the data type being a fitz.fitz.page
import os
import json
import boto3
from urllib.parse import unquote_plus
import fitz, sys
from io import BytesIO
OUTPUT_BUCKET_NAME = os.environ["OUTPUT_BUCKET_NAME"]
OUTPUT_S3_PREFIX = os.environ["OUTPUT_S3_PREFIX"]
SNS_TOPIC_ARN = os.environ["SNS_TOPIC_ARN"]
SNS_ROLE_ARN = os.environ["SNS_ROLE_ARN"]
def lambda_handler(event, context):
textract = boto3.client("textract")
if event:
file_obj = event["Records"][0]
bucketname = str(file_obj["s3"]["bucket"]["name"])
filename = unquote_plus(str(file_obj["s3"]["object"]["key"]))
doc = fitz.open()
s3 = boto3.resource('s3')
obj = s3.Object(bucketname, filename)
fs = obj.get()['Body'].read()
pdf=fitz.open("pdf", stream=BytesIO(fs))
#pdf.close()
rect=fitz.Rect(0.0, 0.0, 595.0, 842.0)
#page = pdf[0]
page1 = doc.new_page(width = rect.width, # new page with ...
height = rect.height)
page1.show_pdf_page(rect, pdf, 0)
print(type(doc))
print(type(page1))
s3.Bucket(bucketname).put_object(Key=filename, Body=page1)
This is happening because the page1 object is defined using fitz.fitz.page
and the type expected by S3 put object is bytes.
In order to solve the issue, you can use the write
function of the new PDF (doc
) and get the output of it which is in bytes format that you could pass to S3 then.
# Save fil first.
new_bytes = doc.write()
s3.Bucket(bucketname).put_object(Key=filename, Body=new_bytes)