Search code examples
selenium-webdriverweb-scrapingxpathpagination

Selenium Pagination Error when scraping clutch.co


For some reason this clutch.co scraper isn't clicking the "next" button and navigating to the next page. So when I run this code it'll only get information from the first page and then close itself.

I added in waits to allow the page to load but it hasn't helped. When watching the browser you can see it scrolls to the bottom of the page but then closes itself.

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

website = "https://clutch.co/us/web-developers"
options = webdriver.ChromeOptions()
options.add_experimental_option("detach", False)

driver = webdriver.Chrome(options=options)
driver.get(website)

wait = WebDriverWait(driver, 10)
company_elements = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'provider-info')))

#pagination
pagination = driver.find_element(By.XPATH,'//ul[@class="pagination justify-content-center"]')
pages = pagination.find_elements(By.TAG_NAME,'li')
last_page = int(250)



company_names = []
taglines = []
locations = []
costs = []
ratings = []

current_page = 1


while current_page <= last_page:
    company_elements = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'provider-info')))

    for company_element in company_elements:
        company_name = company_element.find_element(By.CLASS_NAME, "company_info").text
        company_names.append(company_name)


        tagline = company_element.find_element(By.XPATH,'.//p[@class="company_info__wrap tagline"]').text
        taglines.append(tagline)

        rating = company_element.find_element(By.XPATH,'.//span[@class="rating sg-rating__number"]').text
        ratings.append(rating)

        location = company_element.find_element(By.XPATH, './/span[@class="locality"]').text
        locations.append(location)

        cost = company_element.find_element(By.XPATH, './/div[@class="list-item block_tag custom_popover"]').text
        costs.append(cost)

    current_page = current_page + 1

    try:
        next_page = driver.find_element(By.XPATH,'//li[@class="page-item next"]/a[@class="page-link"]")')
        next_page.click()
        time.sleep(10)
    except:
        break

driver.close()



data = {'Company_Name': company_names, 'Tagline': taglines, 'location': locations, 'Ticket_Price': costs, 'Rating': ratings}
df = pd.DataFrame(data)
df.to_csv('companies_test1.csv', index=False)
print(df)



Solution

  • Your XPath is wrong, use:

    next_page = driver.find_element(By.XPATH,'//li[@class="page-item next"]/a[@class="page-link"]')
    

    But the website block it. If you remove the try/catch, you can read error:

    selenium.common.exceptions.ElementClickInterceptedException:
    Message: element click intercepted: Element
    <a class="page-link" data-page="1" href="/us/web-developers?pag e=1" data-link="?page=1">...</a>
    is not clickable at point (622, 888).
    Other element would receive the click: 
    <div id="CybotCookiebotDialogBodyButtons" style="padding-left: 0px;">...</div>
    

    A better code, but my IP/settings require Cloudfare captcha:

    next_page = driver.find_element(By.XPATH,'//li[@class="page-item next"]/a[@class="page-link"]')
    np = next_page.get_attribute('href')
    driver.get(np)
    time.sleep(6)