Search code examples
pythonbittorrentdht

How to interpret 'nodes' in a DHT response?


I'm reading through BEP-0005 and I don't really understand how the node IDs translate to (IP, port) pairs. Consider the following code:

import bencode
import random
import socket
import pprint

# Generate a 160-bit (20-byte) random node ID.
rand = lambda: ''.join([chr(random.randint(0, 255)) for _ in range(20)])
my_id = rand()
get_peers = {"t": '0f', "y": "q", "q": "get_peers",
    "a": {"id": my_id,
          "info_hash": '\xd9\x9d\x14\x8c\xf7\xb5\xee'
                       '\x84</\x806\xd6d\x13\xb3\xe3\x0f\x1b\xe7'}
}
get_peers_bencoded = bencode.bencode(get_peers)

ip = socket.gethostbyname('router.bittorrent.com')
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.sendto(get_peers_bencoded, (ip, 6881))
r = s.recvfrom(1024)
response = bencode.bdecode(r[0])
if 'nodes' in response['r']:
    x = response['r']['nodes']
    chunks, chunk_size = len(x), 26
    response['r']['nodes'] = [x[i:i+chunk_size]
                              for i in range(0, chunks, chunk_size)]
pprint.pprint(response)

And the response:

{'ip': '\xb9\x05\xdb\xf9\x9eV',
 'r': {'id': '2\xf5NisQ\xffJ\xec)\xcd\xba\xab\xf2\xfb\xe3F|\xc2g',
       'nodes': ['j\x11r\x8a\x95\\QC\xd7~B3\xb7]\x8f\xa3\xe0\x04\x8f\xa7\xbc\x10;\x9e\x83=',
                 'r\x99?\xef)\rY{3 \xcba\x0e* N\xc0\xe7z\xa3\xdbM\xc1t>\x81',
                 '\xaa\xd0L\x14\x07\t\xf3y\xb0\x1fH\xd1-z\xf4\x1bm\x91Vj\xcd\xd0r6\x9f\xff',
                 '\x93f\xd9n\xd6W\x12\xdb\x85\xa4b\x88\xe8\x81\xfapY\xef\xe1\xe9\x9a\x05me>\x81',
                 '\xa6\xc6\n/C\xa3\x9b\x99`v\xe1\xc9\x06%3\xf5_\xbe\xba\xc7\xc2\xe1\x1c\x16rL',
                 'FM\xbeAiKx\x853(q$n\xa2\xebE6<\x07\x1aY\xbaNh\xd9a',
                 '\xbfO\x1c\xc1\xbemt\x1f.\x11\xab]\xde\xcfE@*\x99\xd8NE\x8d\t\x0f\xc4\x91',
                 '=1\x9b\xbf2\x17\x1c\xa1A\x05=\xb6\xdb\xf1\x91\xb7 \x86\xe10;a`n\xd7t',
                 '\xa8\xea1\xc8*>\xbd\xf4\x92\xbbW\xc9\xdc\xd0\xca\x01\xca\xc1\xc6\xd9\x05+\x8f\x9cN\x07',
                 '\x95d\xc4\x85\x9e\xb9V\xbe\xdbd>\xf5T\x192\x07\x95n\x84\xdc\xb2\x98x\xf6\x1a\xe1',
                 "\xb8Pt\xbf\xa2'\xb3N\xd3B\x8f\xab\x91x3\x0bO\xcf!\xcam\x92G\xe3\x9a/",
                 '\xf6+\x9a\xad\x8a\x93\xe882\xf7\x0c;\xd1%ne\xcb\x10\x03\xc9n\x17]\xd1B\xfc',
                 '\xb8d(\xf1\x05j\x89\xb3O\x1c\x04\xd0\x8e\xe0u\x87\x94\x90a\x12\xdan\xa0\xf2R4',
                 '\xd7\x07\xa0\xd6\xc5I\x8bm\xee\x9a\x9e\xd7\xe5\x97J\xb1g_Khg\xed\\\x15^\xe8',
                 '\x04\xc8+\xa4P\xf9\xe5\x98\x8aCX\x8a\x15\xc1\xc2\xe4\x7fY\xbd\xbdY\x84Y\xab=%',
                 '\x9f\xad\xe5\xbb\x11\xad\xae$\x88y\xfeR\xdb%C\xe5<\xf4E\xfa;`\xb3\xfb\xa0\xca']},
 't': '0f',
 'y': 'r'}

What should I do now to get the list of IP:port pairs? I tried socket.inet_ntop(socket.AF_INET, s[-6:][:4]), struct.unpack("H", s[-2:])[0] but couldn't connect to any of such nodes.


Solution

  • It looks like the problem was related to endianness of the port number - changing struct.unpack("H", to `struct.unpack(">H", helped. Here's a working sample:

    #!/usr/bin/env python
    
    import bencode
    import random
    import socket
    import sys
    import ast
    import struct
    
    rand = lambda: ''.join([chr(random.randint(0, 255)) for _ in range(20)])
    my_id = rand()
    
    def query(ip, port):
        print("Trying %s:%d" % (ip, port))
        get_peers = {"t":'0f', "y":"q", "q":"get_peers",
            "a": {"id":my_id,
                  "info_hash": '\xd9\x9d\x14\x8c\xf7\xb5\xee'
                               '\x84</\x806\xd6d\x13\xb3\xe3\x0f\x1b\xe7'}
        }
        get_peers_bencoded = bencode.bencode(get_peers)
    
        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        s.sendto(get_peers_bencoded, (ip, port))
        s.settimeout(0.5)
        try:
            r = s.recvfrom(1024)
        except socket.timeout:
            return []
        response = bencode.bdecode(r[0])
    
        if 'values' in response['r']:
            print(response)
            sys.exit()
    
        ret = []
        for i in range(0, len(response['r']['nodes']), 26):
            s = response['r']['nodes'][i:i+26]
            ip = socket.inet_ntop(socket.AF_INET, s[-6:][:4])
            port = struct.unpack(">H", s[-2:])[0]
            ret += [(ip, port)]
        print("Got %d nodes." % len(ret))
        return ret
    
    if __name__ == '__main__':
        ips = [(socket.gethostbyname('router.bittorrent.com'), 6881)]
        while True:
            node = ips.pop()
            ip, port = node[0], node[1]
            ips += query(ip, port)