Search code examples
pythonseleniumselenium-webdriverwebdriverwait

Selenium - element click intercepted: Element is not clickable at point


I'm trying to scrape a webpage, getting links from main pages and then scrape every page individually. This error occurs sometimes, but not always. I've tried to use WebDriverWait and scroll to end of page, but sometimes I still get this error

How can I asure selenium will only click the button after is loaded?

init.py

import scraper

urls_globo = [
    # 'https://g1.globo.com/brasil', #TODO PAGINA INICIAL, MANTER?
    'https://g1.globo.com/ciencia', 
    'https://g1.globo.com/mundo',   
    'https://g1.globo.com/politica',
    'https://g1.globo.com/saude',   
    'https://g1.globo.com/tecnologia'
]

for url in urls_globo:
    print('\nCATEGORIA: ' + url.split('/')[-1])
    navegador = scraper.GetPages(url)
    links = scraper.GetLinksFromPage(navegador, 20)
    for link in links:
        print(link)

scraper.py

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

opcoes = Options()
opcoes.add_argument('--ignore-certificate-errors')
opcoes.add_argument('--ignore-ssl-errors')
# opcoes.headless = True

#desabiltando anuncios = instalar bloqueador do navegador

navegador = webdriver.Chrome('C:/Users/julia/repos/pos/novo/chromedriver.exe', options=opcoes)
# espera = WebDriverWait(navegador, 10)

def GetPages(url):
    try:
        navegador.get(url)
    except Exception as e:
        raise SystemExit(e)
    
    return navegador

def GetLinksFromPage(navegador, itens_meta):

    espera = WebDriverWait(navegador, 20)
    
    links = []
    #itens_meta = 15


    while itens_meta > len(links):

        #1 - desce a página até o botão aparecer pela 1a vez
        navegador.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        #3 - Clique no botão para mais conteudo
        espera.until(EC.element_to_be_clickable((By.CLASS_NAME, 'load-more'))).click()


        #2 - pega todos links de todos os elementos carregados
        espera.until(EC.visibility_of_all_elements_located((By.CLASS_NAME, 'feed-post-body')))
        #elementos = navegador.find_elements(by=By.CLASS_NAME, value='feed-post-link')
        elementos = espera.until(EC.visibility_of_all_elements_located((By.CLASS_NAME, 'feed-post-link')))
        temp_links = []
        temp_links = [elem.get_attribute('href') for elem in elementos]
        # links.append(temp_links)

        #4 - atualiza a lista que controla o loop
        links.append(temp_links)    

    # print(links)
    # print(len(links))
    #navegador.quit()
    return links

stacktrace

enter image description here


Solution

  • That page can be scraped without the overheads and complexities of Selenium: you can use requests/bs4 instead:

    import requests
    from bs4 import BeautifulSoup
    
    headers= {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36'}
    s = requests.Session()
    s.headers.update(headers)
    for x in range(1, 10): ## pick up the range here
        r = s.get(f'https://g1.globo.com/ciencia/index/feed/pagina-{x}.ghtml', headers=headers)
        soup = BeautifulSoup(r.text, 'html.parser')
        news = soup.select('div.feed-post-body')
        for n in news:
            title = n.select_one('a')
            print(title.get_text(strip=True))
    

    This returns the titles, but you can select any other elements:

    O supertelescópio que vai investigar a origem das estrelas
    Pesquisa liga beijos na Idade do Bronze a origem da herpes labial
    Os animais que fazem arte - e podem ter vantagens evolutivas com isso
    O que é a hipótese de Gaia, que defende que a Terra 'está viva'
    Septuagenárias e rebeldes
    

    If you are keen on using Selenium, then bear in mind that page will load the first three pages worth of news by detecting scrolling to the bottom of the page, and then you can click the button to take you to page 4. You also need to dismiss the cookie button, and to wait for the heavy javascript adverts from page to load.You also need to account for the actual url of the page changing on click, and to redefine the elements.