I'm scraping twitter trying to get the friends/users being followed for a list of twitter users. I'm using tweepy and python 3.6.5 on OSX 10.13. An abbreviated code chunk :
def get_friends_for_each_twitter_user(UserL=None, Name=None):
.
. # Auth keys and such
.
for user in UserL: ### This is a list of USER class with the below fields ###
### Handle protected users ###
if(user.protected == True):
user.friendsL = "protected"
continue
screenNameL=[]
friendIDL=[]
friendL=[]
friendScreenNameL=[]
### Get IDs of people that this user follows (i.e. 'friends') ###
for page in tweepy.Cursor(api.friends_ids, screen_name=user.screenName).pages():
friendIDL.extend(page)
time.sleep(60)
## Loop through IDs, get user profile, keep only friends' screen name ###
for i in range(0, len(friendIDL), 100):
friendL.extend(api.lookup_users(user_ids=friendIDL[i:i+100]))
### Keep only screen name ###
for friend in friendL:
friendScreenNameL.append(friend._json['screen_name'])
user.friendsL = friendScreenNameL
When I do this, after collecting the friends
(i.e. profiles that the user follows) for about a dozen users, I get the following errors:
Traceback (most recent call last):
File "/Users/myusername/.local/virtualenvs/python3.6/lib/python3.6/site-packages/urllib3/connectionpool.py", line 601, in urlopen
chunked=chunked)
File "/Users/myusername/.local/virtualenvs/python3.6/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
six.raise_from(e, None)
File "<string>", line 2, in raise_from
File "/Users/myusername/.local/virtualenvs/python3.6/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
httplib_response = conn.getresponse()
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 1331, in getresponse
response.begin()
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 297, in begin
version, status, reason = self._read_status()
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 266, in _read_status
raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/myusername/.local/virtualenvs/python3.6/lib/python3.6/site-packages/requests/adapters.py", line 440, in send
timeout=timeout
File "/Users/myusername/.local/virtualenvs/python3.6/lib/python3.6/site-packages/urllib3/connectionpool.py", line 639, in urlopen
_stacktrace=sys.exc_info()[2])
File "/Users/myusername/.local/virtualenvs/python3.6/lib/python3.6/site-packages/urllib3/util/retry.py", line 357, in increment
raise six.reraise(type(error), error, _stacktrace)
File "/Users/myusername/.local/virtualenvs/python3.6/lib/python3.6/site-packages/urllib3/packages/six.py", line 685, in reraise
raise value.with_traceback(tb)
File "/Users/myusername/.local/virtualenvs/python3.6/lib/python3.6/site-packages/urllib3/connectionpool.py", line 601, in urlopen
chunked=chunked)
File "/Users/myusername/.local/virtualenvs/python3.6/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
six.raise_from(e, None)
File "<string>", line 2, in raise_from
File "/Users/myusername/.local/virtualenvs/python3.6/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
httplib_response = conn.getresponse()
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 1331, in getresponse
response.begin()
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 297, in begin
version, status, reason = self._read_status()
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 266, in _read_status
raise RemoteDisconnected("Remote end closed connection without"
urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response',))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/myusername/.local/virtualenvs/python3.6/lib/python3.6/site-packages/tweepy/binder.py", line 190, in execute
proxies=self.api.proxy)
File "/Users/myusername/.local/virtualenvs/python3.6/lib/python3.6/site-packages/requests/sessions.py", line 508, in request
resp = self.send(prep, **send_kwargs)
File "/Users/myusername/.local/virtualenvs/python3.6/lib/python3.6/site-packages/requests/sessions.py", line 618, in send
r = adapter.send(request, **kwargs)
File "/Users/myusername/.local/virtualenvs/python3.6/lib/python3.6/site-packages/requests/adapters.py", line 490, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response',))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pdb.py", line 1667, in main
pdb._runscript(mainpyfile)
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pdb.py", line 1548, in _runscript
self.run(statement)
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/bdb.py", line 434, in run
exec(cmd, globals, locals)
File "<string>", line 1, in <module>
File "/Users/myusername/Code/Python/hair_prod/src/main.py", line 170, in <module>
main()
File "/Users/myusername/Code/Python/hair_prod/src/main.py", line 141, in main
get_friends_for_each_twitter_user(UserL=tresemmeUserL, Name="Tresemme")
File "src/twitter_scraper.py", line 187, in get_friends_for_each_twitter_user
friendL.extend(api.lookup_users(user_ids=friendIDL[i:i+100]))
File "/Users/myusername/.local/virtualenvs/python3.6/lib/python3.6/site-packages/tweepy/api.py", line 336, in lookup_users
return self._lookup_users(post_data=post_data)
File "/Users/myusername/.local/virtualenvs/python3.6/lib/python3.6/site-packages/tweepy/binder.py", line 250, in _call
return method.execute()
File "/Users/myusername/.local/virtualenvs/python3.6/lib/python3.6/site-packages/tweepy/binder.py", line 192, in execute
six.reraise(TweepError, TweepError('Failed to send request: %s' % e), sys.exc_info()[2])
File "/Users/myusername/.local/virtualenvs/python3.6/lib/python3.6/site-packages/six.py", line 692, in reraise
raise value.with_traceback(tb)
File "/Users/myusername/.local/virtualenvs/python3.6/lib/python3.6/site-packages/tweepy/binder.py", line 190, in execute
proxies=self.api.proxy)
File "/Users/myusername/.local/virtualenvs/python3.6/lib/python3.6/site-packages/requests/sessions.py", line 508, in request
resp = self.send(prep, **send_kwargs)
File "/Users/myusername/.local/virtualenvs/python3.6/lib/python3.6/site-packages/requests/sessions.py", line 618, in send
r = adapter.send(request, **kwargs)
File "/Users/myusername/.local/virtualenvs/python3.6/lib/python3.6/site-packages/requests/adapters.py", line 490, in send
raise ConnectionError(err, request=request)
tweepy.error.TweepError: Failed to send request: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response',))
Uncaught exception. Entering post mortem debugging
Running 'cont' or 'step' will restart the program
> /Users/myusername/.local/virtualenvs/python3.6/lib/python3.6/site-packages/requests/adapters.py(490)send()
-> raise ConnectionError(err, request=request)
It appears that the errors occur on line friendL.extend(api.lookup_users(user_ids=friendIDL[i:i+100]))
in the
get_friends_for_each_twitter_user()
function
QUESTION :
Why is this error occurring?
How do I avoid/work around it?
After messing with this for a while, I believe that this issue was caused by my network connection. When this happened, I was connected to my 5GHz wireless network. When I connected to my 2.4GHz wireless network, these errors are less frequent. The proper thing to do in this situation is to handle the exception, wait a few seconds and try again. Below is the appropriate code fragment:
def get_friends_for_each_twitter_user(UserL=None, Name=None):
consumerKey = #your value here
consumerSecret = #your value here
auth = tweepy.AppAuthHandler(consumerKey, consumerSecret) ### Supposedly faster
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) ## Now I don't have to handle rate limiting myself
for user in UserL:
accountStatus = 'active'
if(user.protected == True):
user.friendsL = "protected"
continue
screenNameL=[]
friendIDL=[]
friendL=[]
friendScreenNameL=[]
#### TWITTER LIMITS US #####
try :
for page in tweepy.Cursor(api.friends_ids, screen_name=user.screenName).pages():
friendIDL.extend(page)
except tweepy.TweepError as error :
if(error.__dict__['api_code'] == 34):
accountStatus = 'dead'
print("...{} is dead".format(user.screenName))
continue
else:
raise
for i in range(0, len(friendIDL), 100):
### This handles when exception occurs (probably due to connection issues)
### When exception occurs, sleeps then retries. I don't notice this error
### when I'm running on corporate Wifi, maybe my router just sucks
while True:
try :
friendL.extend(api.lookup_users(user_ids=friendIDL[i:i+100]))
except tweepy.TweepError as error :
print("...Exception for {} : api_code {}".format(user.screenName,
error.__dict__['api_code']))
time.sleep(5)
continue
break