Pandas dataframe date&time filtering
Hey team! The whole idea of the script was to access the link at specified times ["20:00", "20:30", "22:15", "22:45", "23:30", "00:00", "01:45", "02:30", "03:00", "03:30"] and to filter the column Expected Ship Date by the time the script ran + 1 hour.
Example: Script ran at 04/06/2023 23:30, it will filter the Expected Ship Date by 05/06/2023 00:30
At the moment, it does not seem to work properly, for whatever reason, no matter the time I run it, it returns the current day 21:00.
Any idea what am I missing?
__author__ = "studorie"
__version__ = "1.0.1"
from datetime import datetime, timedelta
from io import StringIO
import pandas as pd
from apscheduler.schedulers.blocking import BlockingScheduler
from function.requests_retryer import requests_retry_session
def roster(fc):
url = (
f"https://link.com/{fc}"
)
with requests_retry_session() as request:
response = request.get(url)
if response.status_code != 200:
print(response.raise_for_status())
df = pd.read_csv(StringIO(response.text), sep=",")
df.drop(['FN SKU', 'Scannable ID', 'Condition', 'Ship Method', 'Ship Option', 'Pick Priority'], axis=1,
inplace=True)
df["Expected Ship Date"] = pd.to_datetime(df["Expected Ship Date"])
# Get today's date
today = datetime.now().date()
# Get the current time
current_time = datetime.now().time()
# Find the index of the next job time
job_index = next(
(i for i, job_time in enumerate(job_times) if datetime.strptime(job_time, "%H:%M").time() > current_time), 0)
# Get the adjusted job time
adjusted_job_time = job_times[job_index]
# Calculate the adjusted expected ship date based on the scheduled job time + 1 hour
if adjusted_job_time == "00:00":
# If the adjusted job time is midnight, add 1 day instead of 1 hour
adjusted_date = today + timedelta(days=1)
else:
adjusted_date = today
adjusted_datetime = datetime.combine(adjusted_date,
datetime.strptime(adjusted_job_time, "%H:%M").time()) + timedelta(hours=1)
adjusted_expected_ship_date = adjusted_datetime.strftime("%d/%m/%Y %H:%M")
# Filter the DataFrame based on the adjusted expected ship date
filter_condition = df["Expected Ship Date"].dt.strftime("%d/%m/%Y %H:%M") == adjusted_expected_ship_date
filtered_df = df[filter_condition]
timestamp = datetime.now().strftime("%d-%m-%Y-%H-%M")
filename = f"Y:/Public/L&D/Reports for ops/Flow risk/Pick_SLA_{timestamp}.csv"
# Save the filtered data to the specified filename
filtered_df.to_csv(filename, index=False)
print(f"Filtered data saved to {filename}")
if __name__ == "__main__":
schedule = BlockingScheduler()
job_times = ["20:00", "20:30", "22:15", "22:45", "23:30", "00:00", "01:45", "02:30", "03:00", "03:30"]
for job_time in job_times:
schedule.add_job(roster, 'cron', timezone="Europe/London", hour=int(job_time.split(':')[0]),
minute=int(job_time.split(':')[1]), args=["EMA2"])
schedule.start()
You can use Triggers
from apscheduler.schedulers.blocking import BlockingScheduler
from apscheduler.triggers.combining import OrTrigger
from apscheduler.triggers.cron import CronTrigger
if __name__ == "__main__":
schedule = BlockingScheduler()
cron1 = CronTrigger(day_of_week="*", hour=22, minute=15, timezone="Europe/London")
cron2 = CronTrigger(day_of_week="*", hour="20,0,3", minute=0, timezone="Europe/London")
cron3 = CronTrigger(day_of_week="*", hour="22,1", minute=45, timezone="Europe/London")
cron4 = CronTrigger(day_of_week="*", hour="20,2,3", minute=30, timezone="Europe/London")
schedule.add_job(
roster,
OrTrigger([cron1, cron2, cron3, cron4]),
misfire_grace_time=1000,
max_instances=20,
coalesce=True
)
schedule.start()