I have the next code:
import PyPDF2
import openpyxl
from openpyxl import Workbook
from openpyxl import load_workbook
from openpyxl.worksheet.cell_range import CellRange
#Definir Archivo Excel
file = 'C:\\Users\\Desktop\\PYTHON_PDF\\PRUEBA_PDF.xlsx'
wb = openpyxl.load_workbook(file)#define workook
ws = wb['Hoja1']#define sheet1
#define max values for columns
max_row_for_a = max((a.row for a in ws['A'] if a.value is not None))
max_row_for_b = max((b.row for b in ws['B'] if b.value is not None))
#LOOP
for row in ws.iter_rows(min_row=2, max_col=1, max_row=max_row_for_a):
pdf= row[0].value
print (pdf)
for row2 in ws.iter_rows(min_row=2, max_col=2, max_row=max_row_for_b, min_col=2):
extracto = row2[0].value
pdfselect=open(pdf,"rb")
leer = PyPDF2.PdfFileReader(pdfselect)
pagina = leer.getPage(0)
extracto = pagina.extractText()
print(extracto)
wb.save("PRUEBA_PDF2.xlsx")
wb.close
The idea is read the excel's column A with the pdf's names and write in the column B the pdf extracts, but when i execute the code don't show anything, even dont show error. I try with a minimal case like this and i dont have problems:
#pdfselect=open("ejemplo.pdf","rb")
#leer = PyPDF2.PdfFileReader(pdfselect)
#pagina = leer.getPage(0)
#sheet = wb.active
#ws['B2'] = pagina.extractText()
#wb.save("PRUEBA_PDF2.xlsx")
#wb.close
what i'm doing wrong? Greetings!
I found the problem. The correct code it must like this:
import PyPDF2
import openpyxl
from openpyxl import Workbook
from openpyxl import load_workbook
from openpyxl.worksheet.cell_range import CellRange
#Definir Archivo Excel
file = 'C:\\Users\\lorrego\\Desktop\\PYTHON_PDF\\PRUEBA_PDF.xlsx'
wb = openpyxl.load_workbook(file)#define workook
ws = wb['Hoja1']#define la hoja 1
max_row_for_a = max((a.row for a in ws['A'] if a.value is not None))
for row in ws.iter_rows(min_row=2, max_col=2, max_row=max_row_for_a):
pdf= row[0].value
pdfselected=open(pdf,"rb")
leer = PyPDF2.PdfFileReader(pdfselected)
pagina = leer.getPage(0)
ws.cell(row=row[0].row, column=2).value = pagina.extractText()
wb.save("C:\\Users\\Desktop\\PYTHON_PDF\\PRUEBA_PDF.xlsx")
wb.close