Search code examples
javascriptpythonhtmlseleniumbrowser

Selenium won't work unless I actually look at the Web page (perhaps anti-crawler mechanism by JavaScript?)


The following code works fine ONLY when I look at the Web page (aka the Chrome tab being manipulated by Selenium).

Is there a way to make it work even when I'm browsing another tab/window?

(I wonder how the website knows I'm actually looking at the web page or not...)

#This is a job website in Japanese
login_url = "https://mypage.levtech.jp/" 

driver = selenium.webdriver.Chrome("./chromedriver")

#Account and password are required to log in.
#I logged in and got to the following page, which displays a list of companies that I have applied for:
#https://mypage.levtech.jp/recruits/screening

#Dictionary to store company names and their job postings
jobs = {} 


for i, company in enumerate(company_names):    
    time.sleep(1)
   
    element = driver.find_elements_by_class_name("ScreeningRecruits_ListItem")[i]
    while element.text == "": 
    #While loops and time.sleep() are there because the webpage seems to take a while to load
        time.sleep(0.1)
        element = driver.find_elements_by_class_name("ScreeningRecruits_ListItem")[i]
    
    td = element.find_element_by_tag_name("td")
    while td.text == "":
        time.sleep(0.1)
        td = element.find_element_by_tag_name("td")
   
    if td.text == company:
        td.click()
        
        time.sleep(1)
        
        jobs[company] = get_job_desc(driver) #The get_job_desc function checks HTML tags and extract info from certain elements
        
        time.sleep(1)
        
        driver.back()
    
        time.sleep(1)
    
print(jobs)

By the way, I have tried adding a user agent and scroll down the page using the following code, in the hope that the Web page would believe that I'm "looking at it." Well, I failed :(

driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

Solution

  • So, I think the answer to your question is due to window_handles. Whenever we open a new tab, Selenium changes the window's focus on us ( obviously ). Because the focus is on another page, we need to use the driver.switch_to.window(handle_here) method. This way, we can switch to our proper tab. In order to do this, I found a website that has a similar functionality ( also in Japanese / Kanji? ) that might help you out.

    MAIN PROGRAM - For Reference

    from selenium import webdriver
    from selenium.webdriver.chrome.webdriver import WebDriver as ChromeDriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait as DriverWait
    from selenium.webdriver.support import expected_conditions as DriverConditions
    from selenium.common.exceptions import WebDriverException
    import time
    
    
    def get_chrome_driver():
        """This sets up our Chrome Driver and returns it as an object"""
        path_to_chrome = "F:\Selenium_Drivers\Windows_Chrome85_Driver\chromedriver.exe"
        chrome_options = webdriver.ChromeOptions() 
        
        # Browser is displayed in a custom window size
        chrome_options.add_argument("window-size=1500,1000")
        
        return webdriver.Chrome(executable_path = path_to_chrome,
                                options = chrome_options)
    
        
    def wait_displayed(driver : ChromeDriver, xpath: str, int = 5):
        try:
            DriverWait(driver, int).until(
                DriverConditions.presence_of_element_located(locator = (By.XPATH, xpath))
                )
        except:
            raise WebDriverException(f'Timeout: Failed to find {xpath}')
    
    
    # Gets our chrome driver and opens our site
    chrome_driver = get_chrome_driver()
    chrome_driver.get("https://freelance.levtech.jp/project/search/?keyword=&srchbtn=top_search")
    wait_displayed(chrome_driver, "//div[@class='l-contentWrap']//ul[@class='asideCta']")
    wait_displayed(chrome_driver, "//div[@class='l-main']//ul[@class='prjList']")
    wait_displayed(chrome_driver, "//div[@class='l-main']//ul[@class='prjList']//li[contains(@class, 'prjList__item')][1]")
    
    # Click on the first item title link
    titleLinkXpath = "(//div[@class='l-main']//ul[@class='prjList']//li[contains(@class, 'prjList__item')][1]//a[contains(@href, '/project/detail/')])[1]"
    chrome_driver.find_element(By.XPATH, titleLinkXpath).click()
    time.sleep(2)
    
    # Get the currently displayed window handles
    tabs_open = chrome_driver.window_handles
    if tabs_open.__len__() != 2:
        raise Exception("Failed to click on our Link's Header")
    else:
        print(f'You have: {tabs_open.__len__()} tabs open')
    
    # Switch to the 2nd tab and then close it
    chrome_driver.switch_to.window(tabs_open[1])
    chrome_driver.close()
    
    # Check how many tabs we have open
    tabs_open = chrome_driver.window_handles
    if tabs_open.__len__() != 1:
        raise Exception("Failed to close our 2nd tab")
    else:
        print(f'You have: {tabs_open.__len__()} tabs open')
    
    # Switch back to our main tab
    chrome_driver.switch_to.window(tabs_open[0])
    chrome_driver.quit()
    chrome_driver.service.stop()
    

    For scrolling, you could use this method

    def scroll_to_element(driver : ChromeDriver, xpath : str, int = 5):
        try:
            webElement = DriverWait(driver, int).until(
                DriverConditions.presence_of_element_located(locator = (By.XPATH, xpath))
                )
            driver.execute_script("arguments[0].scrollIntoView();", webElement)
        except:
            raise WebDriverException(f'Timeout: Failed to find element using xpath {xpath}\nResult: Could not scroll')