Search code examples
pythonpython-3.xpyqtpyqt5

Converting multiple HTML files to PDF using PyQt5


I tried following this answer: How to use PyQT5 to convert multiple HTML docs to PDF in one loop

I modified it to convert all html files found in a local folder. For example htmls is a list of html files to be converted: [Q:\Ray\test1.html, Q:\Ray\prac2.html]

This is the code. However, when I try to run it, Python just freezes and I have to stop the run.

import os
import glob
from PyQt5 import QtWidgets, QtWebEngineWidgets

class PdfPage(QtWebEngineWidgets.QWebEnginePage):
    def __init__(self):
        super().__init__()
        self._htmls = []
        self._current_path = ""

        self.setZoomFactor(1)
        self.loadFinished.connect(self._handleLoadFinished)
        self.pdfPrintingFinished.connect(self._handlePrintingFinished)

    def convert(self, htmls):
        self._htmls = iter(zip(htmls))
        self._fetchNext()

    def _fetchNext(self):
        try:
            self._current_path = next(self._htmls)
        except StopIteration:
            return False

    def _handleLoadFinished(self, ok):
        if ok:
            self.printToPdf(self._current_path)

    def _handlePrintingFinished(self, filePath, success):
        print("finished:", filePath, success)
        if not self._fetchNext():
            QtWidgets.QApplication.quit()


if __name__ == "__main__":

    current_dir = os.path.dirname(os.path.realpath(__file__))
    folder= current_dir+ '\\*.HTML'
    htmls= glob.glob(folder)

    app = QtWidgets.QApplication([])
    page = PdfPage()
    page.convert(htmls)
    app.exec_()

    print("finished")

Solution

  • It seems that the OP has not understood the logic of my previous solution which is:

    1. Get the resource, in this case files,
    2. Load it on the page,
    3. When the load is finished then print the content of the page,
    4. When the printing is finished then execute step 1 with the next resource.

    In this it does not perform step 2, on the other hand it is recommended that the path of the pdf has a name other than the html

    import os
    import glob
    from PyQt5.QtCore import QUrl
    from PyQt5 import QtWidgets, QtWebEngineWidgets
    
    
    class PdfPage(QtWebEngineWidgets.QWebEnginePage):
        def __init__(self):
            super().__init__()
            self._htmls = []
            self._current_path = ""
    
            self.setZoomFactor(1)
            self.loadFinished.connect(self._handleLoadFinished)
            self.pdfPrintingFinished.connect(self._handlePrintingFinished)
    
        def convert(self, htmls):
            self._htmls = iter(htmls)
            self._fetchNext()
    
        def _fetchNext(self):
            try:
                self._current_path = next(self._htmls)
            except StopIteration:
                return False
            else:
                self.load(QUrl.fromLocalFile(self._current_path))
            return True
    
        def _handleLoadFinished(self, ok):
            if ok:
                self.printToPdf(self._current_path + ".pdf")
    
        def _handlePrintingFinished(self, filePath, success):
            print("finished:", filePath, success)
            if not self._fetchNext():
                QtWidgets.QApplication.quit()
    
    
    if __name__ == "__main__":
    
        current_dir = os.path.dirname(os.path.realpath(__file__))
        folder= current_dir+ '\\*.HTML'
        htmls = glob.glob(folder)
        print(htmls)
        if htmls:
            app = QtWidgets.QApplication([])
            page = PdfPage()
            page.convert(htmls)
            app.exec_()
        print("finished")