This program creates 500 client processes and one Tornado HTTP server. Each client performs one POST to the server. Sometimes I get requests.exceptions.ConnectionError exceptions. I added retry code to compensate. Staggering when the clients start also helps.
I think the requests
and tornado
modules should be able to handle this without extra coding. I'm not sure which is responsible.
#! /usr/bin/env python3
import time
import multiprocessing
import random
import requests
import tornado.ioloop
import tornado.web
class Client(multiprocessing.Process):
def __init__(self, client_id):
multiprocessing.Process.__init__(self)
self.client_id = client_id
self.host = 'http://localhost:8888/log'
self.sample_record = 'x'*300
self.start()
def run(self):
# Wait until top of current 5 sec interval so all clients start together.
time.sleep(5 - time.time()%5)
#time.sleep(random.random()) # This seems to eliminate the problem.
# requests.Session will do keep-alive by default.
session = requests.Session()
payload = { 'record': 'x'*300 }
attempt_count = 0
while True:
try:
response = session.post(self.host, data=payload, timeout=10)
break
except requests.exceptions.ConnectionError:
print('Retry: id=', self.client_id)
attempt_count += 1
assert attempt_count < 10
continue
assert response.status_code == 200 and response.text == 'Success'
post_received_count = 0
class LogHandler(tornado.web.RequestHandler):
def post(self):
global post_received_count
post_received_count += 1
if post_received_count%100 == 0:
print('post count=', post_received_count)
self.write('Success')
def make_app():
return tornado.web.Application(
[
(r"/log", LogHandler), # http://localhost:8888/log
],
cookie_secret = "__TODO:_GENERATE_YOUR_OWN_RANDOM_VALUE_HERE__",
)
if __name__ == "__main__":
time.sleep(6 - time.time()%5) # Wait until one sec into next 5 sec interval.
print('Start clients...')
client_list = [Client(i) for i in range(500)]
print('Done.')
app = make_app()
app.listen(8888)
try:
tornado.ioloop.IOLoop.current().start()
except KeyboardInterrupt:
pass
Output from two runs on Linux. No problem on the first run. The program prints the number of POSTs performed. It stops after 500 POSTs. I then use Ctrl-\ to kill the server. Multiple ConnectionError exceptions requiring retries on the second attempt.
It consistently runs without exceptions if I un-comment the time.sleep(random.random())
statement. That's a random sleep between 0-1 second. Apparently the problem is only when too many clients submit a request at the same time.
$ ulimit -n 10000
$ test_log_server.py
Start clients...
Done.
post count= 100
post count= 200
post count= 300
post count= 400
post count= 500
^\Quit (core dumped)
$ test_log_server.py
Start clients...
Done.
post count= 100
Retry: id= 223
Retry: id= 340
Retry: id= 116
Retry: id= 164
Retry: id= 258
Retry: id= 150
Retry: id= 290
Retry: id= 16
Retry: id= 40
Retry: id= 5
post count= 200
post count= 300
post count= 400
post count= 500
^\Quit (core dumped)
$
I get the following exceptions if I remove the try block. This is from one of the 500 clients. Other clients reported the same exceptions.
Process Client-102:
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 601, in urlopen
chunked=chunked)
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 387, in _make_request
six.raise_from(e, None)
File "<string>", line 3, in raise_from
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 383, in _make_request
httplib_response = conn.getresponse()
File "/usr/lib/python3.6/http/client.py", line 1373, in getresponse
response.begin()
File "/usr/lib/python3.6/http/client.py", line 311, in begin
version, status, reason = self._read_status()
File "/usr/lib/python3.6/http/client.py", line 272, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "/usr/lib/python3.6/socket.py", line 586, in readinto
return self._sock.recv_into(b)
ConnectionResetError: [Errno 104] Connection reset by peer
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/requests/adapters.py", line 440, in send
timeout=timeout
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 639, in urlopen
_stacktrace=sys.exc_info()[2])
File "/usr/lib/python3/dist-packages/urllib3/util/retry.py", line 367, in increment
raise six.reraise(type(error), error, _stacktrace)
File "/usr/lib/python3/dist-packages/six.py", line 692, in reraise
raise value.with_traceback(tb)
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 601, in urlopen
chunked=chunked)
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 387, in _make_request
six.raise_from(e, None)
File "<string>", line 3, in raise_from
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 383, in _make_request
httplib_response = conn.getresponse()
File "/usr/lib/python3.6/http/client.py", line 1373, in getresponse
response.begin()
File "/usr/lib/python3.6/http/client.py", line 311, in begin
version, status, reason = self._read_status()
File "/usr/lib/python3.6/http/client.py", line 272, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "/usr/lib/python3.6/socket.py", line 586, in readinto
return self._sock.recv_into(b)
urllib3.exceptions.ProtocolError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "./test_log_server.py", line 31, in run
response = session.post(self.host, data=payload, timeout=10)
File "/usr/lib/python3/dist-packages/requests/sessions.py", line 567, in post
return self.request('POST', url, data=data, json=json, **kwargs)
File "/usr/lib/python3/dist-packages/requests/sessions.py", line 520, in request
resp = self.send(prep, **send_kwargs)
File "/usr/lib/python3/dist-packages/requests/sessions.py", line 630, in send
r = adapter.send(request, **kwargs)
File "/usr/lib/python3/dist-packages/requests/adapters.py", line 490, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))
Apparently the problem is only when too many clients submit a request at the same time.
500 client processes is a lot. One possibility is the socket listen backlog, which is set to 128 when you use the app.listen
interface. In older kernel versions 128 was the maximum allowed here, but more recently it's become possible to increase it (depending on your kernel version and other configurations). Try replacing app.listen(8888)
with:
server = tornado.httpserver.HTTPServer(app)
server.bind(8888, backlog=4096)
server.start()
I would also suggest if you really need to support this many connections being initiated at once you'll probably want to run more than one sever process instead of just increasing the socket backlog of a single process.