Search code examples
pythonweb-scraping

How to check for new discounts and send to telegram if changes detected?


I like to scrape new discounts from website and text me on telegram with a change on the website.

This is working but i got to much messages and i want to change the script to check a specific class on the website.

So on the website i want to check the <span class="space--ml-1 size--all-l size--fromW3-xl cept-discount">-49%</span>

I want a message if the value is between -65% and -99%. Is this possible? The script to check changes is below here:

import requests
from bs4 import BeautifulSoup
import difflib
import time
from datetime import datetime
import re
import os
import schedule
import cloudscraper


# target URL
url = "https://nl.pepper.com/groep/prijsfout"
# act like a browser
#headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36         (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
#headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36     (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246'}

scraper = cloudscraper.create_scraper()


# Send a message via a telegram bot
def telegram_bot_sendtext(bot_message):
bot_token = '17XXXX32:AAFd5jXXXXXXXXXXXXC5UJgG5pses8'
bot_chatID = '-XXXXX'
send_text = 'https://api.telegram.org/bot' + bot_token + '/sendMessage?chat_id=' + bot_chatID     + '&parse_mode=Markdown&text=' + bot_message

response = requests.get(send_text)

return response.json()


PrevVersion = ""
FirstRun = True
while True:

# download the page
response = scraper.get("https://nl.pepper.com/nieuw").content
# parse the downloaded homepage
soup =  BeautifulSoup(response, 'html.parser')

# remove all scripts and styles
for script in soup(["script", "style"]):
    script.extract()
soup = soup.get_text()
# compare the page text to the previous version
if PrevVersion != soup:
    # on the first run - just memorize the page
    if FirstRun == True:
        PrevVersion = soup
        FirstRun = False
        print ("Start Monitoring "+url+ ""+ str(datetime.now()))
    else:
        print ("Changes detected at: "+ str(datetime.now()))
        OldPage = PrevVersion.splitlines()
        NewPage = soup.splitlines()
        # compare versions and highlight changes using difflib
        #d = difflib.Differ()
        #diff = d.compare(OldPage, NewPage)
        diff = difflib.context_diff(OldPage,NewPage,n=0)
        out_text = "\n".join([ll.rstrip() for ll in '\n'.join(diff).splitlines() if     ll.strip()])
        print (out_text)
        OldPage = NewPage
        # Send the message (such as with a telegram bot provided below)
        telegram_bot_sendtext("Nieuwe prijsfout op Pepper " + url + out_text )

       # print ('\n'.join(diff))
        PrevVersion = soup
else:
    print( "No Changes "+ str(datetime.now()))
time.sleep(5)
continue

Maybe there is also a problem with cookies in this script (or it is not defined.)


Solution

  • A simple possible solution to get a clue if there are any discounts between -65% and -99% could be the following.

    This function is taking your soup and is looking for the discounts in generally and returns True if there is any discount in your range or False if not:

    def get_discounts(soup):
        for d in soup.select('.cept-discount'):
            if d.text != '' and 65 < int(''.join(filter(str.isdigit, d.text))) < 99:
                return True
            else:
                return False
            
    get_discounts(soup)
    

    Note Call the function before you call soup = soup.get_text() - Order is crucial cause you change the content of soup to text.

    Might be better to store the text in another / renamed variable e.g. souptext, ... So you can be sure, that soup always contains the BeautifulSoup object, which represents the document as a nested data structure.

    So you will end up in something like this:

    import requests, time, difflib, os, re, schedule, cloudscraper
    from bs4 import BeautifulSoup
    from datetime import datetime
    
    # target URL
    url = "https://nl.pepper.com/groep/prijsfout"
    # act like a browser
    #headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36         (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
    #headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36     (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246'}
    
    scraper = cloudscraper.create_scraper()
    
    
    # Send a message via a telegram bot
    def telegram_bot_sendtext(bot_message):
        bot_token = '17XXXX32:AAFd5jXXXXXXXXXXXXC5UJgG5pses8'
        bot_chatID = '-XXXXX'
        send_text = 'https://api.telegram.org/bot' + bot_token + '/sendMessage?chat_id=' + bot_chatID     + '&parse_mode=Markdown&text=' + bot_message
    
        response = requests.get(send_text)
    
        return response.json()
    
    
    PrevVersion = ""
    FirstRun = True
    while True:
    
        # download the page
        response = scraper.get("https://nl.pepper.com/nieuw").content
        # parse the downloaded homepage
        soup =  BeautifulSoup(response, 'html.parser')
    
        # remove all scripts and styles
        for script in soup(["script", "style"]):
            script.extract()
        discounts = get_discounts(soup)
    
        soup = soup.get_text()
    
        # compare the page text to the previous version and check if there are any discounts in your range
    
        if PrevVersion != soup and discounts:       
            # on the first run - just memorize the page
            if FirstRun == True:
                PrevVersion = soup
                FirstRun = False
                print ("Start Monitoring "+url+ ""+ str(datetime.now()))
            else:
                print ("Changes detected at: "+ str(datetime.now()))
                OldPage = PrevVersion.splitlines()
                NewPage = soup.splitlines()
                # compare versions and highlight changes using difflib
                #d = difflib.Differ()
                #diff = d.compare(OldPage, NewPage)
                diff = difflib.context_diff(OldPage,NewPage,n=0)
                out_text = "\n".join([ll.rstrip() for ll in '\n'.join(diff).splitlines() if     ll.strip()])
                print (out_text)
                OldPage = NewPage
                # Send the message (such as with a telegram bot provided below)
                telegram_bot_sendtext("Nieuwe prijsfout op Pepper " + url + out_text )
    
               # print ('\n'.join(diff))
                PrevVersion = soup
        else:
            print( "No Changes "+ str(datetime.now()))
        time.sleep(5)
        continue