Search code examples
pythonseleniumweb-scrapingxpathwebdriverwait

Can selenium click on diffrents links?


I want to scrap data from this website(Ignore the perfume that it loads when you scroll down).

For each perfume i want to get its size. In order to see its size I need to click on the perfume which leading me to another page. Assuming I can get the size of a perfume when Im in its url, How can I make a program that will give me the url of every perfume's page in the website?

This is the code that finds the perfume`s size when I Have the right url:

import gspread
from oauth2client.service_account import ServiceAccountCredentials
from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

urlM = 'https://www.myperfume.co.il/155567-%D7%9B%D7%9C-%D7%94%D7%9E%D7%95%D7%AA%D7%92%D7%99%D7%9D-%D7%9C%D7%92%D7%91' \
       '%D7%A8?order=up_title&page=0'
scope = ["https://spreadsheets.google.com/feeds", 'https://www.googleapis.com/auth/spreadsheets',
         "https://www.googleapis.com/auth/drive.file", "https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scope)
client = gspread.authorize(creds)

spreadsheet = client.open("Perfumes")

options = ChromeOptions()
options.headless = True
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

driver.get(# [THE PERFUME'S URL]... )
info = driver.find_element_by_xpath('//*[(@id = "item_current_sub_title")]//span').text
res = ''
for i in info[:info.find('\n')].replace('גודל', ''):
    if i.isdigit() or i.isalpha():
        res += i
print(res)

Solution

  • Here you will need the following:
    Per each product hover over the product to make "more details" and "add to cart" buttons appear.
    Click the "more details" button.
    In the opened page get the product size (and any other details).
    Get back to the main page.
    In order to do that for many products you will have to get the list of products again on the main page. Otherwise you will get stale element exception.
    So, your code can be something like this:

    from selenium.webdriver.common.action_chains import ActionChains
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    
    actions = ActionChains(driver)
    wait = WebDriverWait(driver, 20)
    wait.until(EC.visibility_of_element_located((By.XPATH, "//div[contains(@class,'layout_list_item')]")))
    time.sleep(1)
    products = driver.find_elements_by_xpath("//div[contains(@class,'layout_list_item')]")
    for i in range(len(products)):
        wait.until(EC.visibility_of_element_located((By.XPATH, "//div[contains(@class,'layout_list_item')]")))
        time.sleep(1)
        product = driver.find_elements_by_xpath("//div[contains(@class,'layout_list_item')]")[i]
        #hover over the product block
        actions.move_to_element(product).perform()
        #click the "mode details button
        product.find_element_by_xpath(".//p[contains(@class,'extra_button')]").click()
        #in the details page get the product sub-title containing the product size
        product_size = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div#item_current_sub_title"))).text
        #get back to the main page
        driver.execute_script("window.history.go(-1)")
    

    UPD
    This is exactly what I run:

    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.common.action_chains import ActionChains
    import time
    
    urlM = 'https://www.myperfume.co.il/155567-%D7%9B%D7%9C-%D7%94%D7%9E%D7%95%D7%AA%D7%92%D7%99%D7%9D-%D7%9C%D7%92%D7%91' \
           '%D7%A8?order=up_title&page=0'
    
    driver = webdriver.Chrome(executable_path='chromedriver.exe')
    wait = WebDriverWait(driver, 20)
    actions = ActionChains(driver)
    
    
    driver.maximize_window()
    
    driver.get(urlM)
    wait.until(EC.visibility_of_element_located((By.XPATH, "//div[contains(@class,'layout_list_item')]")))
    time.sleep(1)
    products = driver.find_elements_by_xpath("//div[contains(@class,'layout_list_item')]")
    for i in range(len(products)):
        wait.until(EC.visibility_of_element_located((By.XPATH, "//div[contains(@class,'layout_list_item')]")))
        time.sleep(1)
        product = driver.find_elements_by_xpath("//div[contains(@class,'layout_list_item')]")[i]
        #hover over the product block
        actions.move_to_element(product).perform()
        #click the "mode details button
        product.find_element_by_xpath(".//p[contains(@class,'extra_button')]").click()
        #in the details page get the product sub-title containing the product size
        product_size = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div#item_current_sub_title"))).text
        product_size = product_size.split('\n')[0]
        print(product_size)
        #get back to the main page
        driver.execute_script("window.history.go(-1)")
    

    And it prints me the products sizes like גודל: 100 ML