Search code examples
pythonfor-loopweb-scrapingexceptnosuchelementexception

Can't continue list with except pass


My code has to continue If the except Is assumed (them look for the next item in list) but I'm getting an error...

Tried many thinks but none of them worked. Now I'm with that code:

        def process_cpf_list(self):
            cpfs = self.sheet.col_values(self.cpf_col)[1:]

            bot_url = BOT(cpfs)

            nomes, idades, beneficios, concessoes, salarios, bancoss, bancoscard, consigs, cards = bot_url.search_cpfs()
            print("Atualizando...")

            for i in range(len(nomes)):
                try:
                    self.sheet.update_cell(i + 2, self.nome_col, nomes[i])
                    self.sheet.update_cell(i + 2, self.age_col, idades[i])
                    self.sheet.update_cell(i + 2, self.beneficio_col, beneficios[i])
                    self.sheet.update_cell(i + 2, self.concessao_col, concessoes[i])
                    self.sheet.update_cell(i + 2, self.salario_col, salarios[i])
                    self.sheet.update_cell(i + 2, self.bancos_col, bancoss[i])
                    self.sheet.update_cell(i + 2, self.bancocard_col, bancoscard[i])
                    self.sheet.update_cell(i + 2, self.consig_col, consigs[i])
                    self.sheet.update_cell(i + 2, self.card_col, cards[i])

                except NoSuchElementException:
                    print('CPF Invalido')
                    pass

cpf_updater = CpfSearch('TESTE')
cpf_updater.process_cpf_list()

And its giving me the error:

Traceback (most recent call last):
  File "C:/Users/MOISA/PycharmProjects/inss2/cpf_updater.py", line 54, in <module>
    cpf_updater.process_cpf_list()
  File "C:/Users/MOISA/PycharmProjects/inss2/cpf_updater.py", line 34, in process_cpf_list
    nomes, idades, beneficios, concessoes, salarios, bancoss, bancoscard, consigs, cards = bot_url.search_cpfs()
  File "C:\Users\MOISA\PycharmProjects\inss2\k_bot.py", line 66, in search_cpfs
    nome = self.driver.find_element_by_xpath("/html/body/main[1]/div[1]/div[1]/div[1]/div[1]/h2").text
  File "C:\Users\MOISA\PycharmProjects\inss2\venv\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 394, in find_element_by_xpath
    return self.find_element(by=By.XPATH, value=xpath)
  File "C:\Users\MOISA\PycharmProjects\inss2\venv\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 978, in find_element
    'value': value})['value']
  File "C:\Users\MOISA\PycharmProjects\inss2\venv\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 321, in execute
    self.error_handler.check_response(response)
  File "C:\Users\MOISA\PycharmProjects\inss2\venv\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 242, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.NoSuchElementException: Message: Unable to locate element: /html/body/main[1]/div[1]/div[1]/div[1]/div[1]/h2

Here's the search_cpfs:

def search_cpfs(self):
    nomes = []
    idades = []
    beneficios = []
    concessoes = []
    salarios = []
    bancoss = []
    bancoscard = []
    consigs = []
    cards = []

    for cpf in self.cpfs:
        print(f"Procurando {cpf}.")

        self.driver.get(self.bot_url)

        cpf_input = self.driver.find_element_by_xpath('//*[@id="search"]/div/div[1]/input')
        cpf_input.send_keys(cpf)

        time.sleep(2)

        cpfButton = self.driver.find_element_by_xpath('//*[@id="search"]/div/div[2]/button')
        cpfButton.click()

        time.sleep(2)

        self.delay = 3  # seconds

        nome = self.driver.find_element_by_xpath("/html/body/main[1]/div[1]/div[1]/div[1]/div[1]/h2").text
        idade = self.driver.find_element_by_xpath("/html/body/main[1]/div[1]/div[1]/div[1]/div[1]/ul/li[2]").text
        age = re.search(r'\((.*?)Anos', idade).group(1)
        beneficio = self.driver.find_element_by_xpath(
            "/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/div[5]/span/b").text
        concessao = self.driver.find_element_by_xpath("/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/div[2]/span").text
        salario = self.driver.find_element_by_xpath(
            "/html/body/main[1]/div[1]/div[2]/div/div[3]/div[1]/div[1]/span").text
        bancos = self.driver.find_element_by_xpath('//*[@id="loans"]').text
        bancosw = re.findall(r'(?<=Banco )(\w+)', bancos)
        bankslist = ', '.join(bancosw)
        bancocard = self.driver.find_element_by_xpath('//*[@id="cards"]').text
        bcardw = re.findall(r'(?<=Banco )(\w+)', bancocard)
        bcardlist = ', '.join(bcardw)
        consig = self.driver.find_element_by_xpath("/html/body/main[1]/div[1]/div[1]/div[3]/div[2]/span").text
        card = self.driver.find_element_by_xpath("/html/body/main[1]/div[1]/div[1]/div[3]/div[3]/span").text

        try:
            WebDriverWait(self.driver, self.delay).until(
                EC.presence_of_element_located((By.XPATH, '//*[@id="main"]/div[1]/h2')))
            print('CPF Valido')

            print(nome, age, beneficio, concessao, salario, bankslist, bcardlist, consig, card)

        except NoSuchElementException:
            print('CPF Invalido')
            continue

        nomes.append(nome)
        idades.append(age)
        beneficios.append(beneficio)
        concessoes.append(concessao)
        salarios.append(salario)
        bancoss.append(bankslist)
        bancoscard.append(bcardlist)
        consigs.append(consig)
        cards.append(card)

    return nomes, idades, beneficios, concessoes, salarios, bancoss, bancoscard, consigs, cards

The page works like this:

1- If the client code is ok, the page redirect and show some infos that I can already scrape;

2- If the client code does not have all numbers, the "search" button do nothing;

3- If the client code has all numbers but It have something wrong, the page shows a popup.


Solution

  • The error is raised inside bot_url.search_cpfs(), so you need to move that call inside the try except. In your case:

    def process_cpf_list(self):
        cpfs = self.sheet.col_values(self.cpf_col)[1:]
    
        bot_url = BOT(cpfs)
    
        try:
            # This is where the error is raised
            nomes, idades, beneficios, concessoes, salarios, bancoss, bancoscard, consigs, cards = bot_url.search_cpfs()
            print("Atualizando...")
    
            for i in range(len(nomes)):
                # You may want to add a different error handler here
                self.sheet.update_cell(i + 2, self.nome_col, nomes[i])
                self.sheet.update_cell(i + 2, self.age_col, idades[i])
                self.sheet.update_cell(i + 2, self.beneficio_col, beneficios[i])
                self.sheet.update_cell(i + 2, self.concessao_col, concessoes[i])
                self.sheet.update_cell(i + 2, self.salario_col, salarios[i])
                self.sheet.update_cell(i + 2, self.bancos_col, bancoss[i])
                self.sheet.update_cell(i + 2, self.bancocard_col, bancoscard[i])
                self.sheet.update_cell(i + 2, self.consig_col, consigs[i])
                self.sheet.update_cell(i + 2, self.card_col, cards[i])
        except NoSuchElementException:
            print('CPF Invalido')
            pass
    

    UPDATE

    Inside your search_cpfs method, you just wrap all of the find_element_by_xpath lines inside try and except:

    # def search_cpfs()
    # ....
    for cpf in self.cpfs:
        print(f"Procurando {cpf}.")
    
        self.driver.get(self.bot_url)
    
        try:
            cpf_input = self.driver.find_element_by_xpath('//*[@id="search"]/div/div[1]/input')
            cpf_input.send_keys(cpf)
    
            time.sleep(2)
    
            cpfButton = self.driver.find_element_by_xpath('//*[@id="search"]/div/div[2]/button')
            cpfButton.click()
    
            time.sleep(2)
    
            self.delay = 3  # seconds
    
            nome = self.driver.find_element_by_xpath("/html/body/main[1]/div[1]/div[1]/div[1]/div[1]/h2").text
            idade = self.driver.find_element_by_xpath("/html/body/main[1]/div[1]/div[1]/div[1]/div[1]/ul/li[2]").text
            age = re.search(r'\((.*?)Anos', idade).group(1)
            beneficio = self.driver.find_element_by_xpath(
                "/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/div[5]/span/b").text
            concessao = self.driver.find_element_by_xpath("/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/div[2]/span").text
            salario = self.driver.find_element_by_xpath(
                "/html/body/main[1]/div[1]/div[2]/div/div[3]/div[1]/div[1]/span").text
            bancos = self.driver.find_element_by_xpath('//*[@id="loans"]').text
            bancosw = re.findall(r'(?<=Banco )(\w+)', bancos)
            bankslist = ', '.join(bancosw)
            bancocard = self.driver.find_element_by_xpath('//*[@id="cards"]').text
            bcardw = re.findall(r'(?<=Banco )(\w+)', bancocard)
            bcardlist = ', '.join(bcardw)
            consig = self.driver.find_element_by_xpath("/html/body/main[1]/div[1]/div[1]/div[3]/div[2]/span").text
            card = self.driver.find_element_by_xpath("/html/body/main[1]/div[1]/div[1]/div[3]/div[3]/span").text
    
            WebDriverWait(self.driver, self.delay).until(
                    EC.presence_of_element_located((By.XPATH, '//*[@id="main"]/div[1]/h2')))
            print('CPF Valido**)
    
            print(nome, age, beneficio, concessao, salario, bankslist, bcardlist, consig, card)
    
        except NoSuchElementException:
            print('CPF Invalido')
            continue
    
    nomes.append(nome)
    idades.append(age)
    beneficios.append(benefici
    

    UPDATE 2*

    By the way, in your question, the one that raises the error is this line:

    nome = self.driver.find_element_by_xpath("/html/body/main[1]/div[1]/div[1]/div[1]/div[1]/h2").text
    

    So if you are certain that all other lines will not raise an error, you can just wrap that line in the try except instead of including all others, I just simplified it below:

    for cpf in self.cpfs:
        # other codes here
        # ....
        cpf_input = self.driver.find_element_by_xpath('//*[@id="search"]/div/div[1]/input')
        cpf_input.send_keys(cpf)
    
        # other codes here
        # ....
    
        try:
            # This is the line that raises the error, so you can wrap this line only
            # But to be safe, you can follow the previous one
            nome = self.driver.find_element_by_xpath("/html/body/main[1]/div[1]/div[1]/div[1]/div[1]/h2").text
        except NoSuchElementException:
            print('CPF Invalido')
            continue
    
        # other code follows...
    
    
    nomes.append(nome)
    idades.append(age)
    beneficios.append(benefici