Search code examples
pythonseleniumselenium-webdriverurllib

Is it possible to have multiple explicit waits when using Selenium with Python?


I'm fairly new to Python and Selenium.

My goal is to automate the process of googling a phrase, clicking the first image present in the image results page, waiting for the larger image to load, and then downloading and saving that larger image to a local directory. (The idea is to save a higher-quality version of the image than those initially present in the search results.)

Here's my code that works to download only the initial "smaller" images. (I've omitted all imports, etc., for brevity):

PATH = "/path/to/chromedriver"

save_folder = "../Album-Artwork"
seconds = [1, 2, 3, 4, 5]

if not os.path.exists(save_folder):
    os.mkdir(save_folder)

driver = webdriver.Chrome(PATH)

search_terms = ["John Coltrane Blue Train Album Cover",
                "The Silver Seas Chateau Revenge! Album Cover"]

count = 0

for term in search_terms:

    driver.get("https://www.google.com/imghp?hl=en&ogbl")

    # "q" is the name of the google search field input
    search_bar = driver.find_element_by_name("q")

    search_bar.send_keys(term)
    search_bar.send_keys(Keys.RETURN)

    try:
        search_results = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "islrg"))
        )

        # Gets all of the images on the page (it should be a list)
        images = search_results.find_elements_by_tag_name("img")

        # I just want the first result.
        data_url = images[0].get_attribute('src')

        # Read the dataURL and decode it to bytes
        with urllib.request.urlopen(data_url) as response:
            data = response.read()
            with open(f"{save_folder}/{count}image.jpg", mode="wb") as f:
                f.write(data)

        # This will print if the above succeeds
        print("Artwork Saved")

        count += 1
        sleep(random.choice(seconds))

    except:
        print("Error")
        driver.quit()

driver.quit()

But when I add another "wait" to wait for the larger image to load once it's clicked, as shown in the code I've written here:


PATH = "/path/to/chromedriver"

save_folder = "../Album-Artwork"
seconds = [1, 2, 3, 4, 5]

if not os.path.exists(save_folder):
    os.mkdir(save_folder)

driver = webdriver.Chrome(PATH)

search_terms = ["John Coltrane Blue Train Album Cover",
                "The Silver Seas Chateau Revenge! Album Cover"]

count = 0

for term in search_terms:

    driver.get("https://www.google.com/imghp?hl=en&ogbl")
    search_bar = driver.find_element_by_name("q")
    search_bar.send_keys(term)
    search_bar.send_keys(Keys.RETURN)

    try:

        search_results = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "islrg"))
        )

        images = search_results.find_elements_by_tag_name("img")

######## DIFFERENT CODE FROM PREVIOUS SNIPPET BEGINS HERE ########

        images[0].click()
        
        # Wait for the larger image to load
        new_search_results = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "n3VNCb"))
        )

        large_image = new_search_results.find_element_by_class_name("n3VNCb")

        source = large_image.get_attribute('src')

        # Download and save the image
        urllib.urlretrieve(source, f"{save_folder}/{count}image.jpg")

######## DIFFERENT CODE FROM PREVIOUS SNIPPET ENDS HERE ########

        print("Artwork Saved")

        count += 1
        sleep(random.choice(seconds))

    except:

        print("Error")
        driver.quit()

driver.quit()

I get this error:

urllib3.exceptions.MaxRetryError: HTTPConnectionPool(host='127.0.0.1', port=50518): Max retries exceeded with url: /session/3bb2a509ad09817b8e786b2b1ebcecae/url (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x104b36880>: Failed to establish a new connection: [Errno 61] Connection refused'))

In doing some initial research, the above error seems to be avoided whenever "sleep" or other similar methods are used to "slow" the fast processes of Selenium. I'm using sleep here multiple times, so I'm not sure that's the issue.

It also seems that the "src"s for the "smaller" images are data urls, whereas the "src"s for the larger images are urls. Not sure if that may be related to the issue I'm facing.

I'll continue doing research, but are there any insights here?


Solution

  • To get this code to work I had to remove the variable being created from the:

            new_search_results = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, "n3VNCb"))
            )
    

    as it seems to have been causing the urllib error from the next variable which is using that webdriver wait to find the same element. So, instead I left the used driver to find the larger image and then pass that to the urllib request to download the image. See the full code below:

    import urllib
    import random
    import os
    import time
    from webdriver_manager.chrome import ChromeDriverManager
    from selenium.webdriver.chrome.service import Service
    from selenium.webdriver.chrome.options import Options
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support import expected_conditions as ec
    from selenium.webdriver.support.wait import WebDriverWait
    from selenium.webdriver.common.keys import Keys
    
    save_folder = "/Users/name/Documents/"
    seconds = [1, 2, 3, 4, 5]
    
    if not os.path.exists(save_folder):
        os.mkdir(save_folder)
    
    optionsforchrome = Options()
    optionsforchrome.add_argument('--no-sandbox')
    optionsforchrome.add_argument('--start-maximized')
    optionsforchrome.add_argument('--disable-extensions')
    optionsforchrome.add_argument('--disable-dev-shm-usage')
    optionsforchrome.add_argument('--ignore-certificate-errors')
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=optionsforchrome)
    
    search_terms = ["John Coltrane Blue Train Album Cover",
                    "The Silver Seas Chateau Revenge! Album Cover"]
    
    count = 0
    
    for term in search_terms:
        driver.get("https://www.google.com/imghp?hl=en&ogbl")
        search_bar = driver.find_element(By.NAME, "q")
        search_bar.send_keys(term)
        search_bar.send_keys(Keys.RETURN)
        try:
            search_results = WebDriverWait(driver, 10).until(ec.presence_of_element_located((By.XPATH, '//a[@class="wXeWr islib nfEiy mM5pbd"]')))
            images = search_results.find_elements(By.TAG_NAME, "img")
            ######## DIFFERENT CODE FROM PREVIOUS SNIPPET BEGINS HERE ########
            images[0].click()
            # Wait for the larger image to load
            WebDriverWait(driver, 10).until(ec.presence_of_element_located((By.CLASS_NAME, "n3VNCb")))
            large_image = driver.find_element(By.CLASS_NAME, "n3VNCb")
            source = large_image.get_attribute('src')
            # Download and save the image
            urllib.request.urlretrieve(source, f"{save_folder}/{count}image.jpg")
            ######## DIFFERENT CODE FROM PREVIOUS SNIPPET ENDS HERE ########
            print("Artwork Saved")
            count += 1
            time.sleep(random.choice(seconds))
        except:
            print("Error")
            driver.quit()
    
    driver.quit()
    

    Please note that I am using the Service and Options objects along with the webdriver_manager library for my code. You may need to change those to make your code work.