Search code examples
pythonpdfutf-8reportlab

UTF-8 support in reportlab (Python)


Problem

I can't create a PDF from UTF-8 encoded text using reportlab. What I get is a document full of black squares.

See the screenshot below:

enter image description here

Prerequisites

pip install faker reportlab

Code


import tempfile

from faker import Faker

from reportlab.lib.pagesizes import letter
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib.units import inch
from reportlab.platypus import SimpleDocTemplate, Paragraph

# FAKER = Faker()  # Latin based text generator

FAKER = Faker(locale="hy-AM")  # UTF-8 text generator
text = FAKER.text(max_nb_chars=1_000)

filename = tempfile.NamedTemporaryFile(suffix=".pdf").name

styles = getSampleStyleSheet()
style_paragraph = styles["Normal"]
story = []

doc = SimpleDocTemplate(
    filename,
    pagesize=letter,
    bottomMargin=.4 * inch,
    topMargin=.6 * inch,
    rightMargin=.8 * inch,
    leftMargin=.8 * inch,
)

paragraph = Paragraph(text, style_paragraph)
story.append(paragraph)
doc.build(story)

Also tried

I also tried TTF font (Vera) but it didn't work either:

# ...

from reportlab.pdfbase import pdfmetrics

# ...

pdfmetrics.registerFont(TTFont("Vera", "Vera.ttf"))
pdfmetrics.registerFont(TTFont("VeraBd", "VeraBd.ttf"))
pdfmetrics.registerFont(TTFont("VeraIt", "VeraIt.ttf"))
pdfmetrics.registerFont(TTFont("VeraBI", "VeraBI.ttf"))

# ...

doc = SimpleDocTemplate(
    filename,
    pagesize=letter,
    bottomMargin=.4 * inch,
    topMargin=.6 * inch,
    rightMargin=.8 * inch,
    leftMargin=.8 * inch,
)

# ...

Solution

  • Here's a full code snippet that works:

    import tempfile
    
    from faker import Faker
    from reportlab.lib.pagesizes import letter
    from reportlab.lib.styles import getSampleStyleSheet
    from reportlab.lib.units import inch
    from reportlab.pdfbase import pdfmetrics
    from reportlab.pdfbase.ttfonts import TTFont
    from reportlab.platypus import SimpleDocTemplate, Paragraph
    
    FAKER = Faker(locale="hy-AM")  # UTF-8 text generator
    text = FAKER.text(max_nb_chars=1_000)
    
    filename = tempfile.NamedTemporaryFile(suffix=".pdf").name
    
    pdfmetrics.registerFont(TTFont("Vera", "Vera.ttf"))  # <- Important
    
    styles = getSampleStyleSheet()
    style_paragraph = styles["Normal"]
    style_paragraph.fontName = "Vera"  # <- Important
    story = []
    
    doc = SimpleDocTemplate(
        filename,
        pagesize=letter,
        bottomMargin=.4 * inch,
        topMargin=.6 * inch,
        rightMargin=.8 * inch,
        leftMargin=.8 * inch,
    )
    paragraph = Paragraph(text, style_paragraph)
    story.append(paragraph)
    doc.build(story)
    

    If you need more font types (such as Bold or Italic), you could add them as follows:

    pdfmetrics.registerFont(TTFont("VeraBd", "VeraBd.ttf"))  # <- Important
    pdfmetrics.registerFont(TTFont("VeraIt", "VeraIt.ttf"))  # <- Important
    pdfmetrics.registerFont(TTFont("VeraBI", "VeraBI.ttf"))  # <- Important