Search code examples
pythonweburllib2urlopen

How to reliably process web-data in Python


I'm using the following code to get data from a website:

time_out = 4

def tryconnect(turl, timer=time_out, retries=10):
    urlopener = None
    sitefound = 1
    tried = 0
    while (sitefound != 0) and tried < retries:
        try:
            urlopener = urllib2.urlopen(turl, None, timer)
            sitefound = 0
        except urllib2.URLError:
            tried += 1
    if urlopener: return urlopener
    else: return None

[...]

urlopener = tryconnect('www.example.com')
if not urlopener:
    return None
try:
    for line in urlopener:
        do stuff
except httplib.IncompleteRead:
    print 'incomplete'
    return None
except socket.timeout:
    print 'socket'
    return None
return stuff

Is there a way I can handle all these exceptions without having so much boilerplate code everytime?

Thanks!


Solution

  • You can avoid some boilerplate code in the first function too:

    time_out = 4
    
    def tryconnect(turl, timer=time_out, retries=10):
        for tried in xrange(retries):
            try:
                return urllib2.urlopen(turl, None, timer)
            except urllib2.URLError:
                pass
        return None
    

    and in the second:

    urlopener = tryconnect('www.example.com')
    if urlopener:
        try:
            for line in urlopener:
                do stuff
        except (httplib.IncompleteRead, socket.timeout), e:
            print e
            return None
    else:
        return None