Datetime object compare

I am trying to find recent news that were published within the last 2 hours.

Approach

My goal is to take a datetime object of today and compare it to a date of an article which I scraped from the web.

First I compare the datetime by date and then by hour.

Issue

However it seems that even when given a correct date it says it isn't in the correct range.

False fail:

Code

from bs4 import BeautifulSoup
from urllib.request import urlopen
from urllib.request import Request
from datetime import timedelta
from datetime import datetime


def newz(stock_1):
    list_now=stock_1
    #list_now=chr(list_now)
    new_list=list_now
    list_now=[list_now]
    print("Stock:{}".format(list_now))
    n = 1 #the # of article headlines displayed per ticker
    tickers= list_now

    new_words = {
            'Insider Sells':-3.4,
            'common':2.0,
            'up':3.4,
            'bankruptcy':-3.4,
            'underperforms':-3.4,
            'overperforms':3.4,
            'outperforms':3.4,
            'overbought':-3.4,
            'oversold':3.4,
            'down':2.0,

            }   

    finviz_url = 'https://finviz.com/quote.ashx?t='
    news_tables = {}

    for ticker in tickers:
        url = finviz_url + ticker
        req = Request(url=url,headers={'user-agent': 'my-app/0.0.1'}) 
        resp = urlopen(req)    
        html = BeautifulSoup(resp, features="lxml")
        #print(html)
        news_table = html.find(id='news-table')
        news_tables[ticker] = news_table

    try:
        for ticker in tickers:
            df = news_tables[ticker]
            df_tr = df.findAll('tr')
    
            print ('\n')
            print ('Recent News Headlines for {}: '.format(ticker))
        
            for i, table_row in enumerate(df_tr):
                a_text = table_row.a.text
                td_text = table_row.td.text
                td_text = td_text.strip()
                print("{0}  {1}".format(a_text,td_text))
                td_text=str(td_text)
                a_text=str(a_text)
                # looks specfically for investigation or shareholder alert or investigation alert
                result=a_text.find("INVESTIGATION")
                result=int(result)

                result_1=a_text.find("SHAREHOLDER")
                result_1=int(result_1)

                result_2=a_text.find("ALERT")
                result_2=int(result_2)

                result_3=a_text.find("INVESTOR")
                result_3=int(result_3)

                result_4=a_text.find("NOTICE")
                result_4=int(result_4)



                if (result>=0 or result_1>=0) and result_2>=0:
                    print("Fails: Under Investigation")
                    return True
                elif result>=0 :
                    print("Fails: Under Investigation")
                    return True
                elif result_3>=0 and result_2>=0 :
                    print("Fails: Under Investigation")

                elif result_1>=0 and result_4>=0 :
                    print("Fails: Under Investigation")

                elif result_1>=0 and result_2>=0 :
                    print("Fails: Under Investigation")

                elif result==-1 and result_1==-1 and result_2==-1:
                    pass                    
                       

                if i == n-1:
                    break
    except AttributeError:
        return True
            
    
    td_text=td_text.split("-",2)
    #print("A",td_text)
    month=td_text[0]
    day=td_text[1]
    year=td_text[2]

  
   

    months={'Jan': 1, 'Feb':2, 'Mar': 3, 'Apr':4,'May':5, 'Jun': 6, 'Jul':7,'Aug':8,'Sep':9,'Oct':10, 'Nov':11,'Dec':12}
    month=months.pop(month)
    #print("MOnth",month)
    #print("Year",year)
    #print("Day",day)
    hour=year[3:5]
    #print("Hour data",hour)
    meridian=year[-1:-2]
    #print("Meridian",meridian)
    year=year[0:2]
    year=str(year)
    day=str(day)
    month=str(month)
    year="20"+year


    
    # date we are stripping from web
    t=year+'-'+month+'-'+day+'-'+hour
    #print(t)
    t_hour = datetime.strptime(hour, '%H')
    t = datetime.strptime(t, '%Y-%m-%d-%H')
    #print(t)
   
    
    
        
    
    # todays date in datetime object
    today =datetime.today().strftime('%Y-%m-%d-%H')
    today = datetime.strptime(today, '%Y-%m-%d-%H')

    
    hr_margin=timedelta(hours= 2)

    margin = timedelta(days = 1)
        

    

    #print( "Earnings date:{} ".format(t))
    #print("Today:",today)

    diff_minus=today - margin
    #print(diff_minus)
    diff_plus=today + margin

    diff_hr_plus= today +hr_margin
    diff_hr_minus= today-hr_margin
    #print(diff_plus)

    #t_hour=t_hour[10::]

    if date_comp(t,diff_plus,diff_minus,diff_hr_plus,diff_hr_minus,t_hour,today):
        return True



def date_comp(t,diff_plus,diff_minus,diff_hr_plus,diff_hr_minus,t_hour,today):      

if diff_plus<= t <= diff_plus:
   print("Day is good")
   print("Max allowed date {}".format(diff_plus))
   print("Min allowed date {}".format(diff_minus))
   print('Stripped Datetime {}'.format(t))
   
   #print("Measured time hr",t_hour)
   if  diff_hr_minus<=t_hour <=diff_hr_plus:
       print("Hour is good")
       print("Max allowed Hr {}".format(diff_hr_plus))
       print("Min allowed Hr {}".format(diff_hr_minus))
       print('Stripped Datetime {}'.format(t))
       
       print("News is up to date by Hour!! Time :{0} Story Hit: {1}".format(today,t))
       print("\n")
       return False
   else:
       print("News is NOT up to date by Hour!! Time :{0} Story Hit: {1}".format(today,t))
       print("Max allowed Hr {}".format(diff_hr_plus))
       print("Min allowed Hr {}".format(diff_hr_minus))
       print("\n")
       return True

else:
    print("News is NOT up to date by Day!! {} ".format(t))
    print("Max allowed date {}".format(diff_plus))
    print("Min allowed date {}".format(diff_minus))
    print("\n")
    return True

Solution

See how I reduced your given code to a minimal reproducible example with a few steps:

remove the web-scraping (as not essential for the issue)
remove all the comments that do not explain
remove empty lines that do not help to structure
(optionally) add a test (e.g. a function-call) that shows the issue

Fixed issue

Since you claimed issue with datetime comparison I found one inconsistency:

In day comparison you have:

if diff_plus<= t <= diff_plus:

In hour comparison you have:

   if  diff_hr_minus<=t_hour <=diff_hr_plus:

Things to fix:

adjust the lower boundary for day to diff_min
can simplify date-parsing using strptime with appropriate format literal:
just compare the hour as 24-hour-range integer, using t.hour

Minimal Reproducible Example

from datetime import timedelta
from datetime import datetime

def compared_date_from_td(td_text):    
    """Returns datetime for input of format 'Jan-24-22 05:48PM'."""
    # date from web
    t = datetime.strptime(td_text.strip(), '%b-%d-%y %H:%M%p')
    print("Earnings date: {} ".format(t))
    
    # todays date in datetime object
    today = datetime.today()
    print("Today: {}".format(today))

    margin = timedelta(days = 1)
    diff_minus = today - margin
    diff_plus = today + margin

    t_hour = t.hour  # get the hour part of datetime t

    diff_hr_plus = today.hour + 2
    diff_hr_minus = today.hour - 2

    if date_comp(t,diff_plus,diff_minus,diff_hr_plus,diff_hr_minus,t_hour,today):
        return True


def date_comp(t,diff_plus,diff_minus,diff_hr_plus,diff_hr_minus,t_hour,today):      
    if diff_minus <= t <= diff_plus:  # issue fixed! was: diff_plus<= t <= diff_plus
        print("Day is good")
        print("Max allowed date {}".format(diff_plus))
        print("Min allowed date {}".format(diff_minus))
        print('Stripped Datetime {}'.format(t))

        #print("Measured time hr",t_hour)
        if  diff_hr_minus <= t_hour <= diff_hr_plus:
           print("Hour is good")
           print("Max allowed Hr {}".format(diff_hr_plus))
           print("Min allowed Hr {}".format(diff_hr_minus))
           print('Stripped Datetime {}'.format(t))
           
           print("News is up to date by Hour!! Time :{0} Story Hit: {1}".format(today,t))
           print("\n")
           return False
        else:
           print("News is NOT up to date by Hour!! Time :{0} Story Hit: {1}".format(today,t))
           print("Max allowed Hr {}".format(diff_hr_plus))
           print("Min allowed Hr {}".format(diff_hr_minus))
           print("\n")
           return True

    else:
        print("News is NOT up to date by Day!! {} ".format(t))
        print("Max allowed date {}".format(diff_plus))
        print("Min allowed date {}".format(diff_minus))
        print("\n")
        return True


date_text = 'Jan-24-22 05:48PM'
not_uptodate = compared_date_from_td(date_text)
print("date: {}, compared as not_uptodate => {}".format(date_text, not_uptodate))

This outputs:

Earnings date: 2022-01-24 05:48:00 
Today: 2022-01-25 00:54:16.122160
Day is good
Max allowed date 2022-01-26 00:54:16.122160
Min allowed date 2022-01-24 00:54:16.122160
Stripped Datetime 2022-01-24 05:48:00
News is NOT up to date by Hour!! Time :2022-01-25 00:54:16.122160 Story Hit: 2022-01-24 05:48:00
Max allowed Hr 2
Min allowed Hr -2


date: Jan-24-22 05:48PM, compared as not_uptodate => True

Note: Some formatting needs to be adjusted to make sense like Max allowed Hr 2 and so on. Also the comparison returns True if oudside margins as "NOT up to date" (like in the above example where hour differs more than 2 hours).

Proper solution

Did I get your goal right: Test if datetime is within last 2 hours.

(1) Given a text, which was scraped from web, you would determine the format and parse it to datetime.

(2) Given datetime, you would calculate the timedelta to now. Then test if this is less than 2 hours:

from datetime import datetime, timedelta

scrapedText = 'Jan-24-22 05:48PM'
newsTime = datetime.strptime(scrapedText, '%b-%d-%y %H:%M%p')  # (1) parse datetime

if datetime.now() - newsTime < timedelta(hours=2):  # (2) whithin last 2 hours
    # news is less than 2 hours ago