Search code examples
pythonselenium-webdriverautomationselenium-chromedriverrpa

Python & Selenium Code Bugs - Automation is NOT gathering data as expected


a. I have a simple code for automation that I'm trying to learn from. The code is a robot software that finds key information about important scientists and display it to the user.

b. the program introduces itself and explain the steps it's about to take.

c. it navigates to the wikipedia page of the scientists found in the list SCIENTISTS.

d. it retrieves the dates the scientists were born and died and calculate their age. Also, retrieves the first paragraph of their wikipedia page.

e. it displays all of this information to the user in an easily understood manner.

f. i don't want to use any API from wikipedia

This is what I have in my folder:

In my main.py:

from robotics import Robot
from RPA.Browser.Selenium import Selenium as ChromeBrowser
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

SCIENTISTS = ["Albert Einstein", "Isaac Newton",
              "Marie Curie", "Charles Darwin"]

robot = Robot("WikiRobot")


def introduce_yourself():
    robot.say_hello()


def main():
    robot = Robot("WikiRobot")
    robot.say_hello()

    driver = webdriver.Chrome()

    for scientist in SCIENTISTS:
        # navigate to wikipedia page
        driver.get(
            f"https://en.wikipedia.org/wiki/{scientist.replace(' ', '_')}")

        try:
            # wait for the birthDate element to be visible
            birth_date_element = WebDriverWait(driver, 10).until(
                EC.visibility_of_element_located(
                    (By.CSS_SELECTOR, "span.bday"))
            )
            birth_date = birth_date_element.text

            death_date_element = WebDriverWait(driver, 10).until(
                EC.visibility_of_element_located(
                    (By.CSS_SELECTOR, "span.dday"))
            )
            death_date = death_date_element.text

            born = datetime.strptime(birth_date, '%Y-%m-%d')
            died = datetime.strptime(death_date, '%Y-%m-%d')
            age = int((died - born).days / 365.25)

            summary = WebDriverWait(driver, 10).until(
                EC.visibility_of_element_located(
                    (By.XPATH, "//div[@id='mw-content-text']/div[1]/p[1]"))
            ).text

            # store information in dictionary
            scientist_info = {
                "Name": scientist,
                "Birth Date": birth_date,
                "Death Date": death_date,
                "Age": age,
                "Summary": summary
            }

            # display information
            for key, value in scientist_info.items():
                print(f"{key}: {value}")
                sleep(1)

        except TimeoutException as e:
            print(f"An error occurred while processing {scientist}: {e}")

    # Close the browser after the loop
    driver.quit()


if __name__ == "__main__":
    main()

In my robotics.py:
xx

from RPA.Browser.Selenium import Selenium as ChromeBrowser


class Robot:
    def __init__(self, name):
        self.name = name
        self.browser = ChromeBrowser()

    def say_hello(self):
        print("Hello, my name is " + self.name)

    def say_goodbye(self):
        print("Goodbye, my name is " + self.name)

    def open_webpage(self, webpage):
        self.browser.get(webpage)

So far when I run the code, running python main.py in the terminal, the program does go through the scientists in Chrome one by one and when it closes, unfortunately, I'm not getting the datas I want to show in the terminal. Hence, I'm left with the error messages I included:

Hello, my name is WikiRobot

An error occurred while processing Albert Einstein: Message:

An error occurred while processing Isaac Newton: Message:

An error occurred while processing Marie Curie: Message:

An error occurred while processing Charles Darwin: Message:


Solution

  • Use below main.py changes

    Replaced presence_of_element_located with visibility_of_element_located and also i have updated few locators

    from datetime import datetime
    from time import sleep
    
    from robotics import Robot
    from RPA.Browser.Selenium import Selenium as ChromeBrowser
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.common.exceptions import TimeoutException
    
    SCIENTISTS = ["Albert Einstein", "Isaac Newton",
                  "Marie Curie", "Charles Darwin"]
    
    robot = Robot("WikiRobot")
    
    
    def introduce_yourself():
        robot.say_hello()
    
    
    def main():
        robot = Robot("WikiRobot")
        robot.say_hello()
    
        driver = webdriver.Chrome()
    
        for scientist in SCIENTISTS:
            # navigate to wikipedia page
            driver.get(
                f"https://en.wikipedia.org/wiki/{scientist.replace(' ', '_')}")
    
            try:
                # wait for the birthDate element to be visible
                birth_date_element = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located(
                        (By.CSS_SELECTOR, "span.bday"))
                )
    
                birth_date = birth_date_element.get_attribute("innerText")
    
                death_date_element = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located(
                        (By.XPATH, "//th[contains(text(), 'Died')]/following-sibling::td//span"))
                )
    
                death_date = death_date_element.get_attribute("innerText").replace("(", "").replace(")", "")
    
                born = datetime.strptime(birth_date, '%Y-%m-%d')
                died = datetime.strptime(death_date, '%Y-%m-%d')
                age = int((died - born).days / 365.25)
    
                summary = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located(
                        (By.XPATH, "(//div[@id='mw-content-text']/div/p[not(@class='mw-empty-elt')])[1]"))
                ).text
    
                # store information in dictionary
                scientist_info = {
                    "Name": scientist,
                    "Birth Date": birth_date,
                    "Death Date": death_date,
                    "Age": age,
                    "Summary": summary
                }
    
                # display information
                for key, value in scientist_info.items():
                    print(f"{key}: {value}")
                    sleep(1)
    
            except TimeoutException as e:
                print(f"An error occurred while processing {scientist}: {e}")
    
        # Close the browser after the loop
        driver.quit()
    
    
    if __name__ == "__main__":
        main()
    

    Should print below, i have not pasted complete summary due to large size

    Hello, my name is WikiRobot
    Name: Albert Einstein
    Birth Date: 1879-03-14
    Death Date: 1955-04-18
    Age: 76
    Summary: Albert Einstein (/ˈaɪnstaɪn/ EYEN-styne;[4] German: [ˈalbɛʁt ˈʔaɪnʃtaɪn] (listen); 14 March 1879 – 18 April 1955) was a German-born theoretical phys...
    Name: Isaac Newton
    Birth Date: 1643-01-04
    Death Date: 1727-03-31
    Age: 84
    Summary: Sir Isaac Newton FRS (25 December 1642 – 20 March 1726/27)[a] was an English mathematician, physicist, astronomer, alchemist, theologian, and au...
    Name: Marie Curie
    Birth Date: 1867-11-07
    Death Date: 1934-07-04
    Age: 66
    Summary: Marie Salomea Skłodowska–Curie (/ˈkjʊəri/ KURE-ee,[4] French pronunciation: [maʁi kyʁi], Polish pronunciation: [ˈmarja skwɔˈdɔfska kʲiˈri]; born M...
    Name: Charles Darwin
    Birth Date: 1809-02-12
    Death Date: 1882-04-19
    Age: 73
    Summary: Charles Robert Darwin FRS FRGS FLS FZS JP[6] (/ˈdɑːrwɪn/[7] DAR-win; 12 February 1809 – 19 April 1882) was an English naturalist, geologist, and b...