I am trying to upload an HTML
string, as a PDF
file, to GCS, from my Django application.
import google, os
from google.cloud import storage
class GcpHelper:
def __init__(self, bucket_name):
self.service_account_json_path = 'google_auth.json'
storage_client = storage.Client.from_service_account_json(self.service_account_json_path)
try:
self.__bucket_name = bucket_name
self.bucket = storage_client.get_bucket(bucket_name)
except Exception as err:
logger.error("Error {} while connecting to bucket {}!".format(str(err), bucket_name))
def put_data_in_bucket(self, file_name, data, content_type="application/pdf"):
"""Uploads data to gcp bucket"""
try:
blob = self.bucket.blob(file_name)
blob.upload_from_string(data, content_type=content_type)
return True
except Exception as err:
logger.error("Unable to upload file {} due to {}".format(file_name, str(err)))
raise Exception("Write to gcp failed!")
gcp_helper = GcpHelper('bucket_name')
voucher_html = open('voucher_test.html').read()
#some operations on voucher_html string here
gcp_helper.put_data_in_bucket("booking/voucher.pdf", voucher_html)
I was trying to upload the string directly somehow, rather than saving it as PDF file and then uploading the file. (If nothing works, then will have to do that)
But of course this didn't work as the PDF file uploaded was corrupted. I was hoping the blob.upload_from_string
would take care of any formatting/encoding that would be required.But as it seems, it doesn't. ;)
You could use temporary files to write on disk your PDF and after upload the file to Cloud storage
import os
from tempfile import NamedTemporaryFile
with NamedTemporaryFile(mode='w+b') as temp:
#data msut be the file that came from the request
temp.write(data)
temp.close()
with open(temp.name, 'rb') as pdf:
blob.upload_from_file(pdf)
GCS never going to convert your HTML to PDF file
Converting HTML to PDF is always a difficult task, but there is no way to automate this with Cloud Storage.
To work with pdfkit and avoid any formatting problem, I recommend:
In past projects used this strategy:
I found a similar approach in this github file
def to_pdf(self):
template = get_template('{template}/{template}.html'.format(template=self.html_template))
invoice = template.render({
'site': Site.objects.get_current(),
'invoice': self,
'users': (
('provider', self.provider),
('client', self.client),
),
'line_items': self.aggregate_line_items(),
'currency': self.hourly_rate.hourly_rate_currency
})
self.pdf_path = os.path.join(settings.INVOICE_PDF_PATH, '{}.pdf'.format(uuid.uuid4()))
pdf_configuration = pdfkit.configuration(wkhtmltopdf=settings.HTML_TO_PDF_BINARY_PATH)
pdfkit.from_string(invoice, self.pdf_path, configuration=pdf_configuration, options=self.PDF_OPTIONS)
return self.pdf_path