I'm trying to access *.onion sites using python. Didn't success yet, though.
I've read a lot of stackoverflow questions&answers, tried a lot of different ways of resolving this problem: I tried using Python 2.7
and Python 3.5
, tried using urllib
, urllib2
, requests
(then I found out requests doesn't work with socks), pysocks
, etc, but nothing seems to work.
Right now I'm at the point where I only get the following Error:
> <urlopen error [Errno 11001] getaddrinfo failed>
No, I don't have a firewall, and yes, I have a good internet connection, and yes, the site does exist. I think the problem is that it's an *.onion link.
This is what I'm doing right now:
import socks
import socket
import urllib
import urllib.request
socks.set_default_proxy(socks.SOCKS5, "127.0.0.1", 9050)
socket.socket = socks.socksocket
r = urllib.request.urlopen("http://xmh57jrzrnw6insl.onion")
r.read()
and this is what I'm getting:
---------------------------------------------------------------------------
gaierror Traceback (most recent call last)
C:\Users\yella\Anaconda3\lib\urllib\request.py in do_open(self, http_class, req, **http_conn_args)
1239 try:
-> 1240 h.request(req.get_method(), req.selector, req.data, headers)
1241 except OSError as err: # timeout error
C:\Users\yella\Anaconda3\lib\http\client.py in request(self, method, url, body, headers)
1082 """Send a complete request to the server."""
-> 1083 self._send_request(method, url, body, headers)
1084
C:\Users\yella\Anaconda3\lib\http\client.py in _send_request(self, method, url, body, headers)
1127 body = body.encode('iso-8859-1')
-> 1128 self.endheaders(body)
1129
C:\Users\yella\Anaconda3\lib\http\client.py in endheaders(self, message_body)
1078 raise CannotSendHeader()
-> 1079 self._send_output(message_body)
1080
C:\Users\yella\Anaconda3\lib\http\client.py in _send_output(self, message_body)
910
--> 911 self.send(msg)
912 if message_body is not None:
C:\Users\yella\Anaconda3\lib\http\client.py in send(self, data)
853 if self.auto_open:
--> 854 self.connect()
855 else:
C:\Users\yella\Anaconda3\lib\http\client.py in connect(self)
825 self.sock = self._create_connection(
--> 826 (self.host,self.port), self.timeout, self.source_address)
827 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
C:\Users\yella\Anaconda3\lib\socket.py in create_connection(address, timeout, source_address)
692 err = None
--> 693 for res in getaddrinfo(host, port, 0, SOCK_STREAM):
694 af, socktype, proto, canonname, sa = res
C:\Users\yella\Anaconda3\lib\socket.py in getaddrinfo(host, port, family, type, proto, flags)
731 addrlist = []
--> 732 for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
733 af, socktype, proto, canonname, sa = res
gaierror: [Errno 11001] getaddrinfo failed
During handling of the above exception, another exception occurred:
URLError Traceback (most recent call last)
<ipython-input-72-1e30353c3485> in <module>()
----> 1 r = urllib.request.urlopen("http://xmh57jrzrnw6insl.onion:80")
2 r.read()
C:\Users\yella\Anaconda3\lib\urllib\request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
160 else:
161 opener = _opener
--> 162 return opener.open(url, data, timeout)
163
164 def install_opener(opener):
C:\Users\yella\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
463 req = meth(req)
464
--> 465 response = self._open(req, data)
466
467 # post-process response
C:\Users\yella\Anaconda3\lib\urllib\request.py in _open(self, req, data)
481 protocol = req.type
482 result = self._call_chain(self.handle_open, protocol, protocol +
--> 483 '_open', req)
484 if result:
485 return result
C:\Users\yella\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
441 for handler in handlers:
442 func = getattr(handler, meth_name)
--> 443 result = func(*args)
444 if result is not None:
445 return result
C:\Users\yella\Anaconda3\lib\urllib\request.py in http_open(self, req)
1266
1267 def http_open(self, req):
-> 1268 return self.do_open(http.client.HTTPConnection, req)
1269
1270 http_request = AbstractHTTPHandler.do_request_
C:\Users\yella\Anaconda3\lib\urllib\request.py in do_open(self, http_class, req, **http_conn_args)
1240 h.request(req.get_method(), req.selector, req.data, headers)
1241 except OSError as err: # timeout error
-> 1242 raise URLError(err)
1243 r = h.getresponse()
1244 except:
URLError: <urlopen error [Errno 11001] getaddrinfo failed>
I'm very new to all this stuff, so I might be missing some really simple parts. But I'll be grateful for any help.
ps: when trying to access not an *.onion site, I get the following:
[WinError 10061] No connection could be made because the target machine actively refused it
I'm on Linux but the code you supplied didn't work for me. From the looks of it the DNS resolution is not happening over Tor (based on error 11001 WSAHOST_NOT_FOUND
). I'm a little suspicious that it's actually using Tor because of the 10061 (connection refused) error too.
In any case, I was able to get it working with this:
import urllib2
import socks
from sockshandler import SocksiPyHandler
opener = urllib2.build_opener(SocksiPyHandler(socks.SOCKS5, "127.0.0.1", 9050, True))
print opener.open("http://xmh57jrzrnw6insl.onion").read()
PySocks says in their docs:
Note that monkeypatching may not work for all standard modules or for all third party modules, and generally isn't recommended. Monkeypatching is usually an anti-pattern in Python.
The monkey patching is the use of socket.socket = socks.socksocket
.
If possible, use Requests with the socks5h://
protocol handlers for your proxies:
import requests
import json
proxies = {
'http': 'socks5h://127.0.0.1:9050',
'https': 'socks5h://127.0.0.1:9050'
}
data = requests.get("http://xmh57jrzrnw6insl.onion",proxies=proxies).text
print(data)