I query the sql server and then write the result into a separated csv file in a folder every 10 secs. Then I want to delete those files which are older than 3 days and let the script run forever. So What I did is:
def write_csv():
# connect to sql server and query the info
# write the query result to csv files and store it in folder A
def delete_old_csv():
# check the date of the files within a loop
# delete files if they are older than 3 days
while True:
write_csv()
delete_old_csv()
time.sleep(10)
I think it's not a good design that the delete_old_csv()
to be called every 10 secs looping through files date. Since this is more of a IO bound task, is multi threading a good approach to design here? If yes, how to call these functions into threads?
If my question is wrong here please guide me where to ask this?
Thank you for any help.
Solution 1 without threading:
from datetime import datetime, timedelta
import os
import time
def write_csv():
# connect to sql server and query the info
# write the query result to csv files and store it in folder A
file_expiration_date[filename] = datetime.now() + timedelta(days=3)
def delete_old_csv():
# delete files if they are older than 3 days
# Method 1
current_date = datetime.now()
for filename, expiration_date in file_expiration_date.items():
if current_date >= expiration_date:
os.remove(filename)
del file_expiration_date[filename]
# Method 2
current_date = datetime.now()
filename, expiration_date = max(file_expiration_date.items(), key=lambda x: x[1])
if current_date >= expiration_date:
os.remove(filename)
del file_expiration_date[filename]
file_expiration_date = {} # key: filename, value: file expiration date
while True:
write_csv()
delete_old_csv()
time.sleep(10)
Solution 2 with threading:
from datetime import datetime, timedelta
import os
import time
from threading import Thread
def write_csv():
while running:
# connect to sql server and query the info
# write the query result to csv files and store it in folder A
file_expiration_date[filename] = datetime.now() + timedelta(days=3)
time.sleep(10) # this delay has an impact on how often the files are downloaded
def delete_old_csv():
while running:
# delete files if they are older than 3 days
# Method 1
current_date = datetime.now()
for filename, expiration_date in file_expiration_date.items():
if current_date >= expiration_date:
os.remove(filename)
del file_expiration_date[filename]
# Method 2
current_date = datetime.now()
filename, expiration_date = max(file_expiration_date.items(), key=lambda x: x[1])
if current_date >= expiration_date:
os.remove(filename)
del file_expiration_date[filename]
time.sleep(delay) # this delay has an impact on how often the files testet for expiration
file_expiration_date = {} # key: filename, value: file expiration date
running = True
write_thread = Thread(target=write_csv)
delete_thread = Thread(target=delete_old_csv)
write_thread.start()
delete_thread.start()
try:
while True:
pass
except KeyboardInterrupt:
running = False
write_thread.join()
delete_thread.join()
I think this is a good way to delete the files, which are older than 3 days, but this could also cause a MemoryError.