Search code examples
pythonselenium-webdriverselenium-chromedrivertranslate

Page translate feature is not working as expected in Python using Selenium automation script


Issue Description:

I am trying to automate a process where I can visit a website, hover over the menu navigation bar and click on each navigation category options from tier 1 dropdown, visit that page and scrape product details of top 20 products on that page and put it in an excel file. If that page does not contain any product, the script will continue scrolling down till it reaches the end of the page and if no product-div is found, it will go back to the top of the page and click on the next category in the navigation panel

Function Definitions:

scroll_and_click_view_more function is for scrolling down the page, prod_vitals function is for scraping product details specific to each page, and prod_count function is for extracting total count of products on each page and creating a summary of all pages.

Error Description:

As the website is in Japanese language, I want to translate each page when I open them to english language, and then perform the scraping. I have written a function translate_page to translate the page, and calling this function every time I open the new page from my scrape function. The code is working fine as expected, only issue is that I am still getting all the results in Japanese language instead of English.

Screenshot of my output in the excel file below. All the navigation tab names and product names are in Japanese language. I want them to be translated to English before scraping from the website

Output

I am attaching the class 'WebScraper' below where I have written the function for translation. I have removed the rest of the function definitions for now for clarity. Will add them later on if required.

class WebScraper:
    def __init__(self):
        self.url = "https://staging1-japan.coach.com/?auto=true"
        #self.driver = webdriver.Chrome()
        #options = Options()
        #options.add_argument("--lang=en")
        #self.driver = webdriver.Chrome(service=Service(r"c:\Users\DELL\Documents\Self_Project\chromedriver.exe"), options=options)
        options = Options()
        options.add_argument("--remote-debugging-port=9222")
        self.driver = webdriver.Chrome(service=Service(r"c:\Users\DELL\Documents\Self_Project\chromedriver.exe"), options=options)
    def translate_page(self):
        script = """
        var meta = document.createElement('meta');
        meta.name = 'google';
        meta.content = 'notranslate';
        document.getElementsByTagName('head')[0].appendChild(meta);
        """
        self.driver.execute_script(script)
        self.driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
            "source": """
            Object.defineProperty(navigator, 'languages', {
                get: function() { return ['en-US', 'en']; }
            });
            """
        })
    def scrape(self):
        self.driver.get(self.url)
        #self.driver.maximize_window()
        time.sleep(5)
        nav_count = 0
        mainWindow = self.driver.window_handles[0]
        while True:
            try:
                self.driver.switch_to.window(mainWindow)
                self.driver.execute_script("window.scrollBy(0, 100);")
                soup = BeautifulSoup(self.driver.page_source, 'html.parser')            # Refresh the page source and parse it
                links = soup.find('div', {'class': 'css-wnawyw'}).find_all('a', {'class': 'css-ipxypz'})
                hrefs = [link.get('href') for link in links]
                if nav_count < len(hrefs):                                              # Check if nav_count is within the range of hrefs
                    href = hrefs[nav_count]
                    time.sleep(2)
                    element1 = WebDriverWait(self.driver, 30).until(EC.presence_of_element_located((By.CSS_SELECTOR, f'a[href="{href}"]')))
                    self.driver.execute_script("window.scrollTo(0, arguments[0].getBoundingClientRect().top + window.scrollY - 100);", element1)
                    time.sleep(5)
                    self.driver.execute_script(f"window.open('{href}', '_blank');")
                    time.sleep(3)
                    newTab = self.driver.window_handles[-1]
                    self.driver.switch_to.window(newTab)
                    time.sleep(3)
                    self.translate_page()  # Translate the new page
                    response = scroll_and_click_view_more(self.driver, href)
                    time.sleep(3)
                    if response != "No product tiles found" and response != "Reached the end of the page.":
                        soup = BeautifulSoup(response, 'html.parser')
                        PLP_title = links[nav_count].get('title')
                        prod_vitals(soup, PLP_title, self.url)
                        time.sleep(5)
                        prod_count(soup, PLP_title)
                        self.driver.execute_script("window.scrollBy(0, -500);")
                        time.sleep(2)
                    else:
                        self.driver.execute_script("window.scrollTo(0,0);")
                        time.sleep(3)
                        self.driver.close()
                        continue
                else:
                    break
            except TimeoutException:
                print(f"Element with href {href} not clickable")
                self.driver.save_screenshot('timeout_exception.png')
            except Exception as e:
                print(f"An error occurred: {e}")
            finally:
                nav_count += 1
            self.driver.close()
scraper = WebScraper()
scraper.scrape()                       
time.sleep(5)                         
scraper.driver.quit()                

Solution

  • Below modifications in the code worked for me.

    def __init__(self):
        self.url = "https://staging1-japan.coach.com/?auto=true"
        options = Options()
        options.add_argument("--remote-debugging-port=9222")
        options.add_argument("--lang=en")
        prefs = {
            "translate_whitelists": {"ja": "en"},
            "translate": {"enabled": "true"}
        }
        options.add_experimental_option("prefs", prefs)
        self.driver = webdriver.Chrome(service=Service(r"c:\Users\DELL\Documents\Self_Project\chromedriver.exe"), options=options)
    def translate_page(self):
        script = """
        var meta = document.createElement('meta');
        meta.name = 'google';
        meta.content = 'notranslate';
        document.getElementsByTagName('head')[0].appendChild(meta);
        """
        self.driver.execute_script(script)
        self.driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
            "source": """
            Object.defineProperty(navigator, 'languages', {
                get: function() { return ['en-US', 'en']; }
            });
            """
        })