Search code examples
pythonseleniumweb-scrapingurllibapscheduler

Failed to establish a new connection error


I'm working on a web scraping program and meet some problems.

And raise a MaxRetry Error:

raise MaxRetryError(_pool, url, error or ResponseError(cause))
urllib3.exceptions.MaxRetryError: HTTPConnectionPool(host='127.0.0.1', port=58826): Max retries exceeded with url: /session/c6ef075beb50a6e44c0bb16555679dcd/window/current/size (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x10b9849b0>: Failed to establish a new connection: [Errno 61] Connection refused'))

For that, i searched it on Github, but it didn't work on my program. https://github.com/timgrossmann/InstaPy/issues/3311

from apscheduler.schedulers.blocking import BlockingScheduler
import random
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from random_user_agent.user_agent import UserAgent
from random_user_agent.params import HardwareType, OperatingSystem

def randomUserAgent():
    operating_systems = [OperatingSystem.IOS.value, OperatingSystem.ANDROID.value]
    hardware_types = [HardwareType.MOBILE.value]
    user_agent_rotator = UserAgent(operating_systems=operating_systems,hardware_types=hardware_types)
    ua = user_agent_rotator.get_random_user_agent()
    return ua

def mainBody(times):
    for i in range(times):
        n = 0
        randomUrl = 'https://www.thermofisher.com/'
        driver.set_window_size(375, 677)
        driver.get(randomUrl)
        driver.delete_all_cookies()
        driver.quit()


if __name__=='__main__':
    Times = [5]
    Timer = ['2019-5-6 23:06:30']
    ua = randomUserAgent()
    opts = Options()
    opts.add_argument(ua)
    driver = webdriver.Chrome('/chromedriver',options=opts)
    for i in range(10):
        scheduler = BlockingScheduler()
        times = int(Times[i])
        scheduler.add_job(mainBody, 'date', run_date=Timer[i], args=[times])
        scheduler.start()

Solution

  • The problem seems to be with the driver trying to share sessions. Having each loop create it's own driver solves the error for me.

    from apscheduler.schedulers.blocking import BlockingScheduler
    import random
    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options
    from random_user_agent.user_agent import UserAgent
    from random_user_agent.params import HardwareType, OperatingSystem
    
    def randomUserAgent():
        operating_systems = [OperatingSystem.IOS.value, OperatingSystem.ANDROID.value]
        hardware_types = [HardwareType.MOBILE.value]
        user_agent_rotator = UserAgent(operating_systems=operating_systems,hardware_types=hardware_types)
        ua = user_agent_rotator.get_random_user_agent()
        return ua
    
    def mainBody(times):
        for i in range(times):
            n = 0
            randomUrl = 'https://www.thermofisher.com/'
            driver = webdriver.Chrome('/chromedriver',options=opts)
            driver.set_window_size(375, 677)
            driver.get(randomUrl)
            driver.delete_all_cookies()
            driver.quit()
    
    
    if __name__=='__main__':
        Times = [5]
        Timer = ['2019-5-6 23:06:30']
        ua = randomUserAgent()
        opts = Options()
        opts.add_argument(ua)
        for i in range(10):
            scheduler = BlockingScheduler()
            times = int(Times[i])
            scheduler.add_job(mainBody, 'date', run_date=Timer[i], args=[times])
            scheduler.start()