How can I switch TPU version for TPU VM architechture?
When attempting to switch software version for TPU(TPU VM architechture switching from tpu-vm-tf-2.6.0-pod to tpu-vm-base) using instructions found here, I get Connection Refused exception with traceback:
Traceback (most recent call last):
File "/usr/lib/python3.8/urllib/request.py", line 1354, in do_open
h.request(req.get_method(), req.selector, req.data, headers,
File "/usr/lib/python3.8/http/client.py", line 1256, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/usr/lib/python3.8/http/client.py", line 1302, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/usr/lib/python3.8/http/client.py", line 1251, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/usr/lib/python3.8/http/client.py", line 1011, in _send_output
self.send(msg)
File "/usr/lib/python3.8/http/client.py", line 951, in send
self.connect()
File "/usr/lib/python3.8/http/client.py", line 922, in connect
self.sock = self._create_connection(
File "/usr/lib/python3.8/socket.py", line 808, in create_connection
raise err
File "/usr/lib/python3.8/socket.py", line 796, in create_connection
sock.connect(sa)
ConnectionRefusedError: [Errno 111] Connection refused
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "switch.py", line 20, in <module>
c.configure_tpu_version(args.target_version, restart_type="ifNeeded")
File "/usr/local/lib/python3.8/dist-packages/cloud_tpu_client/client.py", line 391, in configure_tpu_version
for result in results:
File "/usr/lib/python3.8/concurrent/futures/_base.py", line 619, in result_iterator
yield fs.pop().result()
File "/usr/lib/python3.8/concurrent/futures/_base.py", line 444, in result
return self.__get_result()
File "/usr/lib/python3.8/concurrent/futures/_base.py", line 389, in __get_result
raise self._exception
File "/usr/lib/python3.8/concurrent/futures/thread.py", line 57, in run
result = self.fn(*self.args, **self.kwargs)
File "/usr/local/lib/python3.8/dist-packages/cloud_tpu_client/client.py", line 375, in configure_worker
request.urlopen(req)
File "/usr/lib/python3.8/urllib/request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib/python3.8/urllib/request.py", line 525, in open
response = self._open(req, data)
File "/usr/lib/python3.8/urllib/request.py", line 542, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "/usr/lib/python3.8/urllib/request.py", line 502, in _call_chain
result = func(*args)
File "/usr/lib/python3.8/urllib/request.py", line 1383, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "/usr/lib/python3.8/urllib/request.py", line 1357, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 111] Connection refused>
The command is run on cloud TPU VM with cloud-tpu-client version 0.10 When running the same command from my PC I get Connection timed out after a long pause with traceback:
Traceback (most recent call last):
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/urllib/request.py", line 1350, in do_open
encode_chunked=req.has_header('Transfer-encoding'))
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/http/client.py", line 1281, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/http/client.py", line 1327, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/http/client.py", line 1276, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/http/client.py", line 1036, in _send_output
self.send(msg)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/http/client.py", line 976, in send
self.connect()
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/http/client.py", line 948, in connect
(self.host,self.port), self.timeout, self.source_address)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/socket.py", line 728, in create_connection
raise err
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/socket.py", line 716, in create_connection
sock.connect(sa)
TimeoutError: [Errno 110] Connection timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "tpu_version.py", line 19, in <module>
c.configure_tpu_version(args.target_version, restart_type="ifNeeded")
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/site-packages/cloud_tpu_client/client.py", line 392, in configure_tpu_version
for result in results:
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/concurrent/futures/_base.py", line 598, in result_iterator
yield fs.pop().result()
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/concurrent/futures/_base.py", line 435, in result
return self.__get_result()
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/concurrent/futures/_base.py", line 384, in __get_result
raise self._exception
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/concurrent/futures/thread.py", line 57, in run
result = self.fn(*self.args, **self.kwargs)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/site-packages/cloud_tpu_client/client.py", line 376, in configure_worker
request.urlopen(req)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/urllib/request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/urllib/request.py", line 525, in open
response = self._open(req, data)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/urllib/request.py", line 543, in _open
'_open', req)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/urllib/request.py", line 503, in _call_chain
result = func(*args)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/urllib/request.py", line 1378, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/urllib/request.py", line 1352, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 110] Connection timed out>
This feature is not supported by the TPU VM architecture.
Best way to change the TPU VM's version would be to delete it and recreate a new one with the desired version.