After a lot of investigating, I found out that after serving hundreds of thousands of HTTP POST requests, there's a memory leak. The strange part is that the memory leak only occurs when using PyPy.
Here's an example code:
from twisted.internet import reactor
import tornado.ioloop
do_tornado = False
port = 8888
if do_tornado:
from tornado.web import RequestHandler, Application
else:
from cyclone.web import RequestHandler, Application
class MainHandler(RequestHandler):
def get(self):
self.write("Hello, world")
def post(self):
self.write("Hello, world")
if __name__ == "__main__":
routes = [(r"/", MainHandler)]
application = Application(routes)
print port
if do_tornado:
application.listen(port)
tornado.ioloop.IOLoop.instance().start()
else:
reactor.listenTCP(port, application)
reactor.run()
Here is the test code I am using to generate requests:
from twisted.internet import reactor, defer
from twisted.internet.task import LoopingCall
from twisted.web.client import Agent, HTTPConnectionPool
from twisted.web.iweb import IBodyProducer
from zope.interface import implements
pool = HTTPConnectionPool(reactor, persistent=True)
pool.retryAutomatically = False
pool.maxPersistentPerHost = 10
agent = Agent(reactor, pool=pool)
bid_url = 'http://localhost:8888'
class StringProducer(object):
implements(IBodyProducer)
def __init__(self, body):
self.body = body
self.length = len(body)
def startProducing(self, consumer):
consumer.write(self.body)
return defer.succeed(None)
def pauseProducing(self):
pass
def stopProducing(self):
pass
def callback(a):
pass
def error_callback(error):
pass
def loop():
d = agent.request('POST', bid_url, None, StringProducer("Hello, world"))
#d = agent.request('GET', bid_url)
d.addCallback(callback).addErrback(error_callback)
def main():
exchange = LoopingCall(loop)
exchange.start(0.02)
#log.startLogging(sys.stdout)
reactor.run()
main()
Note that this code does not leak with CPython nor with Tornado and Pypy! The code leaks only when using Twisted and Pypy together, and ONLY when using a POST request.
To see the leak, you have to send hundreds of thousands of requests.
Note that when setting PYPY_GC_MAX, the process eventually crashes.
What's going on?
Turns out that the cause of the leak is the BytesIO
module.
Here's how to simulate the leak on Pypy.
from io import BytesIO
while True: a = BytesIO()
Here's the fix: https://bitbucket.org/pypy/pypy/commits/40fa4f3a0740e3aac77862fe8a853259c07cb00b