Search code examples
pythonmultithreadingtwisted

Requests time out with async. How should I fix this error?


I have a script which works fine on my old Windows XP computer but I need to get it working on Windows 7. So I've installed all the needed modules & Cie. But I still get some strange errors and after five hours looking over the internet, I guess that some good soul here can give me a hand.

So here is the problematic function of my script:

from gevent import monkey
import gevent
import requests
from requests import async
import xlwt
from bs4 import BeautifulSoup as soup
from urllib2 import urlopen
import time
from twisted.web import client
from twisted.internet import reactor, defer
import re
import os
import urllib
import random
import sys
import re
import logging
from threading import RLock

logging.basicConfig(level=logging.INFO)
log = logging.getLogger()

from grab.spider import Spider, Task



def BULATS_TC():
    ''' FETCHING BULATS AGENTS '''
    x = 0
    START_BULATS_TC = time.time()
    ws = wb.add_sheet("BULATS_AGENTS")
    Page_List = ['0','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19', '20']
    to_send = []

    def parse(response):
        global x
        html = soup(response.text)
        tableau = html.find('table')
        try:
            rows = tableau.findAll('tr')
            for tr in rows:
                cols = tr.findAll('td')
                y = 0
                x = x + 1
                for td in cols:
                    texte_bu = td.text
                    texte_bu = texte_bu.encode('utf-8')
                    texte_bu = texte_bu.strip()
                    ws.write(x,y,texte_bu)
                    y = y + 1
        except (IndexError, AttributeError):
            pass

    for Page in Page_List:
        r = requests.async.get('http://www.bulats.org/agents/find-an-agent?field_continent_tid=All&field_country_tid=All&page=%s' % Page, timeout=20)
        to_send.append(r)

    responses = requests.async.map(to_send)
    parse_jobs = [gevent.spawn(parse, resp) for resp in responses]
    gevent.joinall(parse_jobs)

    ELAPSED_TIME_BULATS_AGENTS = time.time() - START_BULATS_TC
    return ELAPSED_TIME_BULATS_AGENTS

And here is the error I get in my terminal :

exceptions.IOError: [Errno 0] No error
Restarting gevent.core.dispatch() after an error [1928]: [Errno 0] No error
Unhandled Error
Traceback (most recent call last):
  File "C:\Users\Admin\Desktop\EAGLE_DATA_UPDATE.py", line 509, in <module>
    BULATS_IA_TIME = BULATS_IA()
  File "C:\Users\Admin\Desktop\EAGLE_DATA_UPDATE.py", line 430, in BULATS_IA
    reactor.run()
  File "C:\Python27\lib\site-packages\twisted\internet\base.py", line 1169, in r
un
    self.mainLoop()
--- <exception caught here> ---
  File "C:\Python27\lib\site-packages\twisted\internet\base.py", line 1181, in m
ainLoop
    self.doIteration(t)
  File "C:\Python27\lib\site-packages\twisted\internet\selectreactor.py", line 1
04, in doSelect
    [], timeout)
  File "C:\Python27\lib\site-packages\twisted\internet\selectreactor.py", line 3
6, in win32select
    r, w, e = select.select(r, w, w, timeout)
  File "C:\Python27\lib\site-packages\gevent\select.py", line 63, in select
    result.event.wait(timeout=timeout)
  File "C:\Python27\lib\site-packages\gevent\event.py", line 74, in wait
    result = get_hub().switch()
  File "C:\Python27\lib\site-packages\gevent\hub.py", line 164, in switch
    return greenlet.switch(self)
  File "C:\Python27\lib\site-packages\gevent\hub.py", line 179, in run
    result = core.dispatch()
  File "core.pyx", line 398, in gevent.core.dispatch (gevent/core.c:5404)

exceptions.IOError: [Errno 0] No error
Restarting gevent.core.dispatch() after an error [1929]: [Errno 0] No error
Unhandled error in Deferred:
Unhandled Error
Traceback (most recent call last):
Failure: twisted.internet.defer.FirstError: FirstError[#2, [Failure instance: Tr
aceback (failure with no frames): <class 'twisted.internet.error.TimeoutError'>:
 User timeout caused connection failure.
]]

I think that my connexion is not very stable (I'm in China) so maybe I need to set up a bigger timeout, but with requests and my script I don't know how to proceed.

I hope that it's all clear enough.


Solution

  • I finally got it working by reinstalling Twisted.