Search code examples
pythonutf-8bytedecodingbitcoin

UnicodeDecodeError: 'utf-8' codec can't decode byte (python)


I'm playing around, trying to pull a bitcoin block template using the getblocktemplate method. I got a response but when I try to decode it from bytes I'm getting an error. Check it out:

s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

HOST = "184.70.15.166" # choose a node from blockchain.info
PORT = 8333

s.connect((HOST, PORT))
s.send(msg)
s.recv(1024)

x = random.randrange(320)
print(x)

data = json.dumps({'version': '2.0', 'id': 'x', 'method': 'getblocktemplate'}).encode('utf-8').strip()
s.send(data)
resp = s.recv(2048).decode('utf8')
print (resp)


# prints id number -
315

# prints response (in bytes) -
b'\xf9\xbe\xb4\xd9verack\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00]\xf6\xe0\xe2\xf9\xbe\xb4\xd9alert\x00\x00\x00\x00\x00\x00\x00\xa8\x00\x00\x00\x1b\xf9\xaa\xea`\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\x7f\x00\x00\x00\x00\xff\xff\xff\x7f\xfe\xff\xff\x7f\x01\xff\xff\xff\x7f\x00\x00\x00\x00\xff\xff\xff\x7f\x00\xff\xff\xff\x7f\x00/URGENT: Alert key compromised, upgrade required\x00F0D\x02 e?\xeb\xd6A\x0fG\x0fk\xae\x11\xca\xd1\x9cHA;\xec\xb1\xac,\x17\xf9\x08\xfd\x0f\xd5;\xdc:\xbdR\x02 m\x0e\x9c\x96\xfe\x88\xd4\xa0\xf0\x1e\xd9\xde\xda\xe2\xb6\xf9\xe0\r\xa9L\xad\x0f\xec\xaa\xe6n\xcfh\x9b\xf7\x1bP\xf9\xbe\xb4\xd9ping\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00-8\xc9\x03\xd7\xbc\x0f\xc9\xc2\x1d36'

# try to decode from bytes and get this error -
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/usr/lib/python3/dist-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 699, in runfile
    execfile(filename, namespace)
  File "/usr/lib/python3/dist-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 88, in execfile
    exec(compile(open(filename, 'rb').read(), filename, 'exec'), namespace)
  File "/home/myfolder/Desktop/bitcoin/bitcoin 2017/connection.py", line 66, in <module>
    print (resp.decode('utf8'))
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xf9 in position 0: invalid start byte

What do you think? I've been looking all over the web and can't find a fix that works.


Solution

  • An immediate solution is using binascii like this way:

    For example, your data is:

    a = b'\xf9\xbe\xb4\xd9verack\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00]\xf6\xe0\xe2\xf9\xbe\xb4\xd9alert\x00\x00\x00\x00\x00\x00\x00\xa8\x00\x00\x00\x1b\xf9\xaa\xea`\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\x7f\x00\x00\x00\x00\xff\xff\xff\x7f\xfe\xff\xff\x7f\x01\xff\xff\xff\x7f\x00\x00\x00\x00\xff\xff\xff\x7f\x00\xff\xff\xff\x7f\x00/URGENT: Alert key compromised, upgrade required\x00F0D\x02 e?\xeb\xd6A\x0fG\x0fk\xae\x11\xca\xd1\x9cHA;\xec\xb1\xac,\x17\xf9\x08\xfd\x0f\xd5;\xdc:\xbdR\x02 m\x0e\x9c\x96\xfe\x88\xd4\xa0\xf0\x1e\xd9\xde\xda\xe2\xb6\xf9\xe0\r\xa9L\xad\x0f\xec\xaa\xe6n\xcfh\x9b\xf7\x1bP\xf9\xbe\xb4\xd9ping\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00-8\xc9\x03\xd7\xbc\x0f\xc9\xc2\x1d36'
    

    You can do like this:

    import binascii
    
    # binascii.b2a_uu takes at most 45 bytes at once
    # this why i'm splitting the data into chunks of 45 bytes at most
    b = [a[k:k+45] for k in range(0, len(a), 45)]
    
    # You can also use:
    # binascii.b2a_uu(k).decode('UTF8')
    final = "".join(binascii.b2a_uu(k).decode() for k in b)
    
    print(final)
    

    Output:

    M^;ZTV79E<F%C:P            !=]N#B^;ZTV6%L97)T         *@    ;
    M^:KJ8 $              /___W\     ____?_[__W\!____?P    #___]_
    M /___W\ +U521T5.5#H@06QE<G0@:V5Y(&-O;7!R;VUI<V5D+"!U<&=R861E
    M(')E<75I<F5D $8P1 (@93_KUD$/1P]KKA'*T9Q(03OLL:PL%_D(_0_5.]PZ
    MO5("(&T.G);^B-2@\![9WMKBMOG@#:E,K0_LJN9NSVB;]QM0^;ZTV7!I;F< 
    7          @    M.,D#U[P/R<(=,S8