Search code examples
pythonpyinstallereelpdfium

pdfium library is not working when compiling my code in python


I wrote a program in the python programming language that converts a pdf file to a jpg file. I used the "eel" library in the frontend part. Everything is working in VS Code. I used pdfium library to convert PDF to jpg file. I compiled the application code using pyinstaller. There is a problem with the pdfium library before launch.

func.py: i

import pandas as pd
import requests
from urllib.parse import urlencode
from datetime import datetime
import pypdfium2 as pdfium
import win32com.client as win32
from pywintypes import com_error
import psutil
import time,os
import openpyxl as xl 
def ExceltoImage(excelfile):
    for proc in psutil.process_iter():
        if proc.name() == "EXCEL.EXE":
            proc.kill()
    wbx = xl.load_workbook(excelfile) 
    print(wbx.sheetnames[0])
    pdffile = os.path.join(os.getcwd(),'temp','template.pdf')
    excel = win32.gencache.EnsureDispatch('Excel.Application')
    wb = excel.Workbooks.Open(excelfile)
    time.sleep(7)
    # ws = wb.WorkSheets(1).Select()
    ws = wb.Worksheets(wbx.sheetnames[0]).Select()
    # Save
    wb.ActiveSheet.ExportAsFixedFormat(0, pdffile)
    time.sleep(7)
    wb.Close()
    excel.Quit()
    time.sleep(2)
    pdf = pdfium.PdfDocument(pdffile)
    imgs = []
    for i in range(len(pdf)):
        img = os.path.join(os.getcwd(),'temp',f'template{i:03d}.jpg')
        page = pdf[i]
        image = page.render(scale=4).to_pil()
        image.save(img)
        imgs.append(img)
    return imgs
def ExceltoImageHeader(pdffile):
    pdf = pdfium.PdfDocument(pdffile)
    imgs = []
    for i in range(len(pdf)):
        img = os.path.join(os.getcwd(),'temp',f'header{i:03d}.jpg')
        page = pdf[i]
        image = page.render(scale=4).to_pil()
        image.save(img)
        imgs.append(img)
    return imgs
def getImgData(file):
    excel_data = pd.read_excel(file,usecols="A,B,C,D")
    data = pd.DataFrame(excel_data)
    d = data.values.tolist()
    newdata = []
    for tr,x in enumerate(d):
        if tr==0:
            continue
        if str(x[2])!='nan' and str(x[3])!='nan':            
            newdata.append(x)
    return newdata
def getImgList(file):
    excel_data = pd.read_excel(file,usecols="C")
    data = pd.DataFrame(excel_data)
    d = data.values.tolist()
    newdata = []
    for tr,x in enumerate(d):
        if tr<7:
            continue
        if str(x[0])!='nan':            
            newdata.append(x)
    print(type(newdata),newdata)
    newdata = list(dict.fromkeys(newdata))
    return newdata
def getyandex(linkimg):   
    name = os.path.basename(linkimg)
    image_name = os.path.join(os.getcwd(),'temp',name)
    print(linkimg)
    headers = requests.utils.default_headers()

    headers.update(
        {
            'User-Agent': 'My User Agent 1.0',
        }
    )
    r = requests.get(linkimg, stream=True,headers=headers)
    if r.status_code==200:
        with open(image_name, 'wb') as f:
            for chunk in r.iter_content():
                f.write(chunk)
        if(os.path.exists(image_name)):
            return image_name
        else:
            return False
    else:
        return False

main.py:

    from fpdf import FPDF
import os,json,datetime
from func import getImgData,getImgList,getyandex,ExceltoImage,ExceltoImageHeader
class FPDF(FPDF):
    
    def header(self):
        f = open(os.path.join(os.getcwd(),'template','setting.json'),encoding="UTF-8")
        setting = json.load(f)
        # Устанавливаем лого
        self.image('template/logo.jpg', 8, 6, 25)
        self.add_font('sysfont', '', r"c:\WINDOWS\Fonts\timesi.ttf", uni=True)
        self.set_font('sysfont', '', 8)
 
        # Добавляем адрес
        self.cell(150)
        self.cell(0, 4, setting['company'], ln=1)
        self.cell(150)
        self.cell(0, 4, setting['inn'], ln=1)
        self.cell(150)
        self.cell(0, 4, setting['address'], ln=1)
        self.cell(150)
        self.cell(0, 4, setting['tel'], ln=1)
        self.cell(150)
        self.cell(0, 4, setting['email'], ln=1)

        # Разрыв линии
        self.ln(3)
 
    def footer(self):
        self.set_y(-10)
        self.set_font('Arial', 'I', 8)
 
        # Добавляем номер страницы
        page =  str(self.page_no()) + '/{nb}'
        self.cell(0, 10, page, 0, 0, 'C')
 
def create_pdf(smetfile,imgsfile):
    f = open(os.path.join(os.getcwd(),'template','setting.json'),encoding="UTF-8")
    setting = json.load(f)
    pdf = FPDF()
    # Создаем особое значение {nb}
    pdf.alias_nb_pages()
    # part 1
    
    pdf.add_page()
    pdf.add_font('Times', '', r"c:\WINDOWS\Fonts\timesbd.ttf", uni=True)
    pdf.set_font('Times', '', 14)
    pdf.cell(0, 5, txt=setting['header_1'], ln=1,align='C')
    pdf.cell(0, 5, txt=setting['header_2'], ln=1,align='C')
    pdf.cell(0, 3, txt="", ln=1,align='C')
    pdf.cell(0, 5, txt=setting['header_3'], ln=1,align='C')
    pdf.cell(0, 5, txt="", ln=1,align='C')
    listimg = getImgList(smetfile)
    dataimg = getImgData(imgsfile)
    if len(listimg)>0 and len(dataimg)>0:
        for indx,ls in enumerate(listimg):
            if indx>1 and indx%2==0:
                pdf.add_page()
            getindex = False
            for index,img in enumerate(dataimg):
                if img[0]==ls[0]:
                    getindex = index
            if getindex==False:
                print("Not Found")
                continue
            else:
                name = dataimg[getindex][0].strip()
                typeimg = dataimg[getindex][1].strip()
                imglink = dataimg[getindex][2].strip()
                videolink = dataimg[getindex][3].strip()
            if typeimg=='Не отображаем' or imglink=='Без изображения':
                continue
            pdf.cell(0, 10, txt=name, ln=1,align='C')
            pdf.cell(30)
            print(imglink)
            imglink = 'https://prokarniz.ru/wp-content/uploads/2017/09/elektricheskiy-karniz-novokitay-2-380x260.jpg'
            img = getyandex(imglink)
            if img==False:
                print("Not Found 404")
                continue
            if os.path.exists(img):
                pdf.image(img,w=130)
            else:
                continue
            if videolink!='Без видео':
                pdf.cell(0, 10, txt=videolink, ln=1,align='C',link=videolink)
            pdf.cell(10,ln=1)

    
    
    # part 2
            # pdf.set_margins(0,0,0)
    pdf.add_page("L")
    # pathxlsx = os.path.join(os.getcwd(),'template','template.xlsx')
    imgs = ExceltoImage(smetfile)
    # pdf.cell(20)
    if len(imgs)>0:
        for imgxx in imgs:
            pdf.image(imgxx,10,30,w=280,h=180)


    # part 3
    pdf.add_page('P')
    pdf.add_font('Times', '', r"c:\WINDOWS\Fonts\timesbd.ttf", uni=True)
    pdf.set_font('Times', '', 14)
    pathxlsxheader = os.path.join(os.getcwd(),'template','footer.pdf')
    imgs = ExceltoImageHeader(pathxlsxheader)
    # pdf.cell(20)
    if len(imgs)>0:
        for imgxx in imgs:
            pdf.image(imgxx,0,30,w=200,h=260)

    now = datetime.datetime.now()
    string = now.strftime('%Y%m%d%H%M%S')
    pathtopdf = os.path.join(os.getcwd(),'output',f'result_{string}.pdf')
    pdf.output(pathtopdf) 
# if __name__ == '__main__':
#     create_pdf('output/header_footer.pdf')

app.py

    import main
import eel,os
import tkinter 
import tkinter.filedialog as filedialog
if __name__ == '__main__':
    # pathsmet = ''
    # pathimages = ''
    @eel.expose
    def run(pathsmet,pathimages):
        print(pathsmet,pathimages)
        if os.path.exists(pathsmet) and os.path.exists(pathimages):
            pdfpathname = main.create_pdf(pathsmet,pathimages)
            return pdfpathname
        else:
            return "NO"
    @eel.expose
    def selectFolder(type):
        print("Here")
        root = tkinter.Tk()
        root.attributes("-topmost", True)
        root.withdraw()
        filetypes = (
            ('Excel', '*.xlsx'),
            ('All files', '*.*')
        )
        directory_path = filedialog.askopenfilename(
        title='Excel file',
        initialdir='/',
        filetypes=filetypes)
        
        if type=='smet':
            pathsmet=directory_path
            print(pathsmet)
            return str(pathsmet)
        if type=='imgs':
            pathimages=directory_path
            print(pathimages)
            return str(pathimages)

    chrome = os.path.join(os.getcwd(),'template','chrome-win','chrome.exe')
    front = os.path.join(os.getcwd(),'template','front')
    eel.init(front)

    eel.browsers.set_path("chrome", chrome)

    eel.start('index.html', mode="chrome", size=(760, 760))

py -m eel app.py web


Traceback (most recent call last): File "app.py", line 1, in File "PyInstaller\loader\pyimode2_importers.py", line 419, in exec_module File "main.py", line 3, in File "PyInstaller\loader\pyimode2_importers.py", line 419, in exec_module File "func.py", line 5, in File "PyInstaller\loader\pyimode2_importers.py", line 419, in exec_module File "pypdfium2_init.py", line 4, in File "PyInstaller\loader\pyimode2_importers.py", line 419, in exec_module File "pypdfium2_library_scope.py", line 6, in File "PyInstaller\loader\pyimode2_importers.py", line 419, in exec_module File "pypdfium2\raw.py", line 5, in File "PyInstaller\loader\pyimode2_importers.py", line 419, in exec_module File "pypdfium2_raw__init.py", line 5, in File "PyInstaller\loader\pyimode2_importers.py", line 419, in exec_module File "pypdfium2_raw\bindings.py", line 53, in File "pypdfium2_raw\bindings.py", line 44, in _register_library File "pypdfium2_raw\bindings.py", line 37, in _find_library ImportError: Could not find library 'pdfium' (dirs=['.'], search_sys=False) [8092] Failed to execute script 'app' due to unhandled exception!

Everything is working in VS Code. When I flash the EXE with pyinstallaer, I get an error saying that the "pdfium" library was not found.


Solution

  • The pypdfium2 modules still look for some files present in site-packages even after the solution is packaged to an EXE. And these files ('pdfium.dll' and two different 'version.json') are nowhere to be found.

    My workaround was to simply add these files to the correct path using pyinstaller's spec file (or using the command line option --add-data)

    So, for that, you'll first need to have pypdfium2 installed in your machine (preferably on a virtual environment) and have your spec file with the following tuples included in datas:

    a = Analysis(
        ['your_project.py'],
        pathex=[],
        binaries=[],
        datas=[
            ('venv\\Lib\\site-packages\\pypdfium2_raw\\pdfium.dll', 'pypdfium2_raw'),
            ('venv\\Lib\\site-packages\\pypdfium2_raw\\version.json', 'pypdfium2_raw'),
            ('venv\\Lib\\site-packages\\pypdfium2\\version.json', 'pypdfium2')
        ],
        ...
    )
    

    Alternatively, you could pass those files as multiple --add-data with the pyinstaller command

    pyinstaller --add-data "venv\Lib\site-packages\pypdfium2_raw\pdfium.dll;pypdfium2_raw" --add-data "venv\Lib\site-packages\pypdfium2_raw\version.json;pypdfium2_raw" --add-data "venv\Lib\site-packages\pypdfium2\version.json;pypdfium2" your_project.py
    

    Here is the reference for the spec file: https://pyinstaller.org/en/v4.0/spec-files.html

    PS.: The above paths are for a virtual environment called venv and a windows OS, but basically you'll just need to go where the site-pakages are located :)