I am able to highlight all the occurrences of a single word in .pdf file using this
but unable to highlight multiple keywords in .pdf file. Here's my code
import fitz
import os
keywords = ["remote","setup"]
pdfFile = "\D:\Python_Scripts\Email Analysis\certification-ACDA.pdf"
for keyword in keywords:
pdf = fitz.open(pdfFile)
for page in pdf:
text_instances = page.searchFor(keyword)
if len(text_instances) > 0:
for inst in text_instances:
highlight = page.addHighlightAnnot(inst)
highlight.setColors(stroke=(1, 0, 0))
highlight.update()
pdf.save(f"{os.path.splitext(os.path.basename(pdfFile))[0]}.pdf", garbage=4, deflate=True, clean=True)
I am overwriting the file after completing search for single keyword but getting this error
Traceback (most recent call last): File "D:/Python_Scripts/Email Analysis/PDF.py", line 19, in <module> pdf.save(f"{os.path.splitext(os.path.basename(pdfFile))[0]}.pdf", garbage=4, deflate=True, clean=True) File "D:\Python_Scripts\Email Analysis\venv\lib\site-packages\fitz\fitz.py", line 4209, in save user_pw, RuntimeError: cannot remove file 'certification-ACDA.pdf': Permission denied
Try to iterate the string using for
:
import fitz
pdf_file = fitz.open("filename.pdf")
for page in pdf_file:
mylist=["word1","word2"]
for i in mylist:
text_to_be_highlighted = i
highlight = page.search_for(text_to_be_highlighted)
for inst in highlight:
highlight = page.add_highlight_annot(inst)
highlight.update()
pdf_file.save("output.pdf", garbage=4, deflate=True, clean=True)