Search code examples
pythonseleniumxpathcss-selectorswebdriverwait

How to find the product name and price using the python scraper?


I am writing a selenium based webscraper in python and it keeps throwing:

no such element: unable to locate element

Even though I can see the element in the selenium browser that is launched. here is the link it keeps failing on: https://www.neimanmarcus.com/p/givenchy-g-chain-ring-prod250190244?childItemId=NMY5X1R_&navpath=cat000000_cat4870731_cat50910737_cat2650734&page=0&position=11

Here is my code for the driver:

def getDriver():
    try:
        options = webdriver.ChromeOptions()
        options.add_argument("start-maximized")
        options.add_experimental_option("excludeSwitches", ["enable-automation"])
        options.add_experimental_option('useAutomationExtension', False)
        options.add_experimental_option('excludeSwitches', ['enable-logging'])
        driver = webdriver.Chrome(ChromeDriverManager().install(), chrome_options=options)
        return driver
    except Exception:
        traceback.print_exc()
        print(Exception)

Here is the scraper:

def getProduct(domain, url):
    # find the yaml file for the domain
    driver = getDriver()

    noYaml = False
    try:
        markupPath = 'markups/%s.yml' % domain
    except Exception as error:
        noYaml = True
    if(noYaml == False):
        with open(markupPath, 'r') as file:
            yamlElements = yaml.safe_load(file)

        titleXpath = '//span[contains(concat(" ",normalize-space(@class)," ")," Titlestyles__ProductName-fRyAwr ")]'
        priceXpath = '//span[contains(concat(" ",normalize-space(@class)," ")," Pricingstyles__RetailPrice-eYMMwV ")]'
        print("XXXXXXPATHHHHHHHHHHHHHS: ",titleXpath, priceXpath)
        driver.get(url)
        driver.implicitly_wait(10)
        
        try:
            # WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.XPATH,titleXpath)))
            
            title = driver.find_element(By.XPATH, titleXpath)
        except Exception as error:
            print("ERROR: ", error)
            title = None

        try: 
            # WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.XPATH,priceXpath)))
            price = driver.find_element_by_xpath(priceXpath)
            # price = driver.find_element(By.XPATH,priceXpath)
            print(price)
        except Exception as error:
            print("ERRROR: ", error)
            price = None
        driver.execute_script("window.stop();")
        data={}
        if(title != None):
            print(title.get_attribute('innerHTML'))
            data['title'] = title.get_attribute('innerHTML')

        if(price != None):
            print(price.get_attribute('innerHTML'))
            data['price'] = price.get_attribute('innerHTML')

        if 'price' not in data or data['price'] == "" or data['price'] == None:
            driver.quit()
            return False
            getProduct(domain, url)
        driver.quit()
        return data```

I have been stuck here for a while and can't figure out why it is throwing an error 
  Message: no such element: Unable to locate element: {"method":"xpath","selector":"//span[contains(concat(" ",normalize-space(@class)," ")," Pricingstyles__RetailPrice-eYMMwV ")]"}

Solution

  • It appears that website will automatically redirect visitor according to the country determined from visitor's IP, and it will also change the classes for item name and price accordingly. Here is a more robust solution, which will account for such changes, and will also wait for the element to load in page:

    from selenium import webdriver
    from selenium.webdriver.chrome.service import Service
    from selenium.webdriver.chrome.options import Options
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    
    
    chrome_options = Options()
    chrome_options.add_argument("--no-sandbox")
    
    webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
    browser = webdriver.Chrome(service=webdriver_service, options=chrome_options)
    
    url='https://www.neimanmarcus.com/en-jp/p/givenchy-g-chain-ring-prod250190244?childItemId=NMY5X1R_&navpath=cat000000_cat4870731_cat50910737_cat2650734&page=0&position=11'
    
    browser.get(url) 
    
    try:
        WebDriverWait(browser, 20).until(EC.element_to_be_clickable((By.ID, "closeButton"))).click()
    except Exception as e:
        print('no pop-up, moving on')
    
    title = WebDriverWait(browser, 20).until(EC.element_to_be_clickable((By.XPATH, "//span[@data-test='pdp-title']")))
    price = WebDriverWait(browser, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[@data-test='pdp-pricing']")))
    print(title.text.strip(), price.text.strip())
    

    Result:

    G-Chain Ring JPY 46709