Search code examples
pythondjangoencodexhtml2pdfbytesio

xhtml2pdf problem with converting html file with polish characters


I am trying to make invoices by creating html file and convert it to pdf and than send as http response. The problem is that those invoices contains polish characters which UTF-8 does not display. I have tried to use ISO-8859-2 to display them, but than I am getting error: ('charmap' codec can't encode characters in position 1159-1163: character maps to ).

utils.py:

from io import BytesIO

from django.http import HttpResponse
from django.template.loader import get_template
from xhtml2pdf import pisa


def render_to_pdf(template_src, context_dict={}):
    template = get_template(template_src)
    html = template.render(context_dict)
    result = BytesIO()
    pdf = pisa.pisaDocument(
        src=BytesIO(html.encode('ISO-8859-2')), 
        dest=result, 
        encoding='UTF-8'
        )

    if pdf.err:
        return HttpResponse('We had some errors <pre>' + html + '</pre>')
    return HttpResponse(result.getvalue(), content_type='application/pdf')

views.py:

class GeneratePDF(View):
    def get(self, request, pk=None):
        "getting data here"
                
        pdf = render_to_pdf("invoice.html", data)

        if pdf:
            response = HttpResponse(pdf, content_type='application/pdf')
            filename = "Sales_Invoice_%s.pdf" % ("name")
            content = "inline; filename=%s" % (filename)
            download = request.GET.get("download")
            if download:
                content = "attachment; filename='%s'" % (filename)
            response['Content-Disposition'] = content
            return response
        return Response(status=rest_status.HTTP_404_NOT_FOUND)

invoice.html:

<html>
<head>
<title>Sales Invoice - {{ sales_invoice.id }}</title>
<style>
    @page {
        size: A4 portrait;
        @frame header_frame {
            -pdf-frame-content: header_content;
            left: 50pt; width: 512pt; top: 50pt; height: 40pt;
        }
        @frame content_frame {
            left: 50pt; width: 512pt; top: 90pt; height: 632pt;
            {% comment %} -pdf-frame-border: 1; {% endcomment %}
        }
        @frame footer_frame {
            -pdf-frame-content: footer_content;
            left: 50pt; width: 512pt; top: 772pt; height: 20pt;
        }
    }
</style>
<head>
<body>
    <div id="header_content"><h1>Sales Invoice {{ sales_invoice.id }}</h1></div>
    <p>
        <strong>Date of issue:</strong> Krakow, {{ today | date:"d/m/Y" }}<br />
        <strong>Date of sale:</strong> {{ today | date:"d/m/Y" }}<br />
        <strong>Date of payment:</strong> {{ today | date:"d/m/Y" }}<br />
        <strong>Payment:</strong> cash<br />
    </p>
    {% if sales_invoice.parts %}
    <table id="cssTable" class="cssTdTh">
      <thead>
        <tr>
          <th>Part</th>
          <th>Code</th>
          <th>Quantity</th>
          <th>Price</th>
        </tr>
      </thead>
      <tbody>
        {% for part in sales_invoice.parts %}
        <tr>
          <td>{{ part.name }}</td>
          <td>{{ part.code }}</td>
          <td>{{ part.amount }}</td>
          <td>{{ part.price_out }}</td>
        </tr>
        {% endfor %}
        <tr>
          <td colspan="3">Total</td>
          <td>{{sales_invoice.total_part_price}}</td>
        </tr>
      </tbody>
    </table>
    {% endif %}

    <br />

    {% if sales_invoice.works %}
    <table id="cssTable" class="cssTdTh">
      <thead>
        <tr>
          <th>Work</th>
          <th>Price</th>
        </tr>
      </thead>
      <tbody>
        {% for work in sales_invoice.works %}
        <tr>
          <td>{{ work.name }}</td>
          <td>{{ work.price }}</td>
        </tr>
        {% endfor %}
        <td>Total</td>
        <td>{{sales_invoice.total_work_price}}</td>
      </tbody>
    </table>
    {% endif %}
    <div id="footer_content">Footer</div>
</body>
</html>
print("==============================================================")
for work in sales_invoice.works.all():
    print(work.name)

for part in sales_invoice.parts.all():
    print(part.name)
print("==============================================================")

output:
==============================================================
Wymiana łącznika elastycznego
Wymiana sprężarki klimatyzacji
Montaż radia
==============================================================
Bęben
Bagnet
Alternator
==============================================================

Result I am getting:
Using ISO-8859-2: result
Using UTF-8: result

Pip freeze:

Django==3.2.7
xhtml2pdf==0.2.5

Solution

  • The problem was not in encoding, it is just fonts. The solution is to use Asian fonts that mhtml2pdf supports. xhtml2pdf documentation

    For example client can have polish letters in name, so I use this font here

    <td style="font-family: STSong-Light">{{client.full_name}}</td>
    

    utils.py:

    from io import BytesIO
    
    from django.http import HttpResponse
    from django.template.loader import get_template
    from xhtml2pdf import pisa
    
    
    def render_to_pdf(template_src, context_dict={}):
        template = get_template(template_src)
        html = template.render(context_dict)
        result = BytesIO()
        pdf = pisa.pisaDocument(
            src=BytesIO(html.encode('UTF-8')),
            dest=result,
            encoding='UTF-8'
        )
    
        if pdf.err:
            return HttpResponse('We had some errors <pre>' + html + '</pre>')
        return HttpResponse(result.getvalue(), content_type='application/pdf')