I'm making a pdf 'date checker' in Python which tells me if every page of the pdf has tomorrows date at the top (for checking newspapers as part of my job).
So far so good until I attempted to put it all into a GUI, the buttons display the correct filename, but only open and check the last file in he list the buttons were generated from 'Files[i]'.
Can anybody figure out from my horrible nooby code why this is happening? please excuse the mess (I'm new) :)
Here is my ugly code :) I think the issue is either where I open the file using 'with open(files[i])' or 3rd line from the bottom where the buttons are created. Any help would be greatly appreciated, thank you.
import os, glob
import fileinput
import tkinter as tk
import dateutil
import datetime
from dateutil.relativedelta import *
from dateutil.easter import *
from dateutil.parser import *
from dateutil.rrule import *
import PyPDF2
from PyPDF2 import PdfReader
from datetime import datetime, timedelta
from tkinter import *
folder_path = 'C:users/axlra/documents/datechecker'
for filename in glob.glob(os.path.join(folder_path, '*.pdf')):
with open(files[i], 'r') as f:
text = f.read()
print (files[i])
print (len(text))
def checknow():
tomorrow = (datetime.now() + timedelta(days=1)).strftime("%d-%m-%Y")
file = open(files[i], 'rb')
reader = PdfReader(files[i])
total = len(reader.pages)
for x in range(total+1):
if x > total: file.close()
page = reader.pages[0]
found = False
text = (page.extract_text())
parts = []
def visitor_body(text, cm, tm, fontDict, fontSize):
y = tm[5]
if y > 1600 and y < 10000:
parts.append(text)
page.extract_text(visitor_text=visitor_body)
text_body = "".join(parts)
#print(text_body)
word = text_body
word=word[22:-1]
#print(word)
prodate = parse(word)
str_date = prodate.strftime("%d-%m-%Y")
print(str_date)
print(files[i])
if tomorrow in str_date:
found = True
if found:
#print(x)
print("Tomorrow's date was found on page"+ " "+str(x))
else:
#print(x)
print("Tomorrow's date was NOT found on page"+ " "+str(x))
location = os.getcwd() # get present working directory location here
counter = 0 #keep a count of all files found
files = [] #list to store all pdf files found at location
for file in os.listdir(location):
try:
if file.endswith(".pdf"):
print ("pdf file found:\t", file)
files.append(str(file))
counter = counter
except Exception as e:
raise e
print ("No files found here!")
root = Tk()
btn = [] #creates list to store the buttons ins
for i in range(counter): #this just popultes a list as a replacement for the actual inputs for troubleshooting purposes
files.append(str(i))
for i in range(len(files)): #this says for *counter* in *however many elements there are in the list files*
#the below line creates a button and stores it in an array we can call later, it will print the value of it's own text by referencing itself from the list that the buttons are stored in
btn.append(Button(root, text=files[i], command=checknow))
btn[i].pack() #this packs the buttons
root.mainloop()
Based off the given solutions, this is the working code, the solution was to completely get rid of the 'i list' and just use file_path:
import os
import tkinter as tk
from tkinter import messagebox
import os, glob
import fileinput
import tkinter as tk
import dateutil
import datetime
from dateutil.relativedelta import *
from dateutil.easter import *
from dateutil.parser import *
from dateutil.rrule import *
import PyPDF2
from PyPDF2 import PdfReader
from datetime import datetime, timedelta
from tkinter import *
import re
location = os.getcwd()
counter = 0
files = []
for file in os.listdir(location):
try:
if file.endswith(".pdf"):
print ("pdf file found:\t", file)
files.append(str(file))
counter = counter
except Exception as e:
raise e
print ("No files found here!")
tomorrow = (datetime.now() + timedelta(days=-1)).strftime("%A,%B%e")
tomorrow = tomorrow.replace(" ", "")
tomorrow2 = (datetime.now() + timedelta(days=-1)).strftime("%d.%m.%Y")
tomorrow2 = tomorrow.replace(" ", "")
tomorrow3 = (datetime.now() + timedelta(days=-1)).strftime("%A%e%B%Y")
tomorrow3 = tomorrow.replace(" ", "")
tomorrow4 = (datetime.now() + timedelta(days=-1)).strftime("%A,%B%e")
tomorrow4 = tomorrow.replace(" ", "")
tomorrow5 = (datetime.now() + timedelta(days=-1)).strftime("%A,%e%B")
tomorrow5 = tomorrow.replace(" ", "")
def open_pdf(file_path):
file = open(file_path, 'rb')
reader = PdfReader(file)
total = len(reader.pages)
for x in range(total):
if x > x: file.close()
page = reader.pages[x]
text = (page.extract_text())
text = text.replace(" ", "")
#print(text)
substring = tomorrow
first_index = text.find(substring)
if first_index != -1:
second_index = text.find(substring, first_index + len(substring))
if second_index != -1:
print("Tomorrows date "+ tomorrow+ " appears twice on page"+ " "+str(x).format(substring))
else:
print("Tomorrows date "+ tomorrow+ " appears only once on page"+ " "+str(x)+" -".format(substring))
else:
print("Tomorrows date "+ tomorrow+ " does not appear on page"+ " "+str(x)+" ---".format(substring))
def create_buttons(directory):
for filename in os.listdir(directory):
if filename.endswith(".pdf"):
file_path = os.path.join(directory, filename)
button = tk.Button(root, text=filename, command=lambda f=file_path: open_pdf(f))
button.pack()
root = tk.Tk()
create_buttons(os.getcwd())
root.mainloop()
The basic answer is that at the end of for i in range(len(files)) the i does not get dereference like it does in some languages. A simple test to do is that this will give you an i of 2.
for i in range(3):
pass
print(i)
So when you call checknow() the referenced file would be the last file in files since your i doesn't change after the loop.
Something I've done in the past is create a class encompassing it so that each one holds to their own references. I did it without subclassing the tkinter class, but you could. A sample for an idea of what I did is
class FileButton:
def checknow(self):
file_name = self._file_name
#as an example of how you can reference the file_name.
#you can also do this by doing self._button.cget("text") and not have to store file_name
pass
def __init__(self, root, file_name):
self._root = root
self._file_name = file_name
self._button = tkinter.Button(root, text=file_name, command=self.checknow)
self._button.pack()
for i in range(len(files)):
btn.append(FileButton(root, files[i]))
I haven't tested this particular code, and my previous uses were more for labels and entries, but the principle of it was the same and I can confirm that using the callback in this manner worked. Also, if you don't need to reference the buttons anymore you don't have to append them to the btn list either.