I'm trying to make a simple distributed job client/server system in Twisted. Basically the steps are:
But I'm having trouble debugging my protocol on a local machine.
JobServer.py
from twisted.application import internet, service
from twisted.internet import reactor, protocol, defer
from twisted.protocols import basic
from twisted.protocols.basic import Int32StringReceiver
from twisted.web import client
import random
import json
import base64
from logger import JobLogger
class JobServerProtocol(Int32StringReceiver):
log = JobLogger("server.log")
def connectionMade(self):
self.log.write("Connected to client")
self.sendJob(None)
def stringReceived(self, msg):
self.log.write("Recieved job from client: %s" % msg)
self.sendJob(msg)
def sendJob(self, msg):
d = self.factory.getJob(msg)
def onError(err):
self.transport.write("Internal server error")
d.addErrback(onError)
def sendString(newjob_dict):
encoded_str = json.dumps(newjob_dict)
self.transport.write(encoded_str)
self.log.write("Sending job to client: %s" % encoded_str)
d.addCallback(sendString)
def lengthLimitExceeded(self, msg):
self.transport.loseConnection()
class JobServerFactory(protocol.ServerFactory):
protocol = JobServerProtocol
def __init__(self, jobs, files):
assert len(jobs) == len(files)
self.jobs = jobs
self.files = files
self.results = []
def getJob(self, msg):
# on startup the client will not have a message to send
if msg:
# recreate pickled msg
msg_dict = json.loads(msg)
self.results.append((msg_dict['result'], msg_dict['jidx']))
# if we're all done, let the client know
if len(self.jobs) == 0:
job = None
jidx = -1
encoded = ""
else:
# get new job for client to process
jidx = random.randint(0, len(self.jobs) - 1)
job = self.jobs[jidx]
del self.jobs[jidx]
# get file
with open(self.files[jidx], 'r') as f:
filecontents = f.read()
encoded = base64.b64encode(filecontents)
# create dict object to send to client
response_msg = {
"job" : job,
"index" : jidx,
"file" : encoded
}
return defer.succeed(response_msg)
# args for factory
files = ['test.txt', 'test.txt', 'test.txt']
jobs = ["4*4-5", "2**2-5", "2/9*2/3"]
application = service.Application('jobservice')
factory = JobServerFactory(jobs=jobs, files=files)
internet.TCPServer(12345, factory).setServiceParent(
service.IServiceCollection(application))
JobClient.py
from twisted.internet import reactor, protocol
from twisted.protocols.basic import Int32StringReceiver
import json
import time
from logger import JobLogger
class JobClientProtocol(Int32StringReceiver):
log = JobLogger("client.log")
def stringReceived(self, msg):
# unpack job from server
server_msg_dict = json.loads(msg)
job = server_msg_dict["job"]
index = server_msg_dict["index"]
filestring = server_msg_dict["file"]
if index == -1:
# we're done with all tasks
self.transport.loseConnection()
self.log.write("Recieved job %d from server with file '%s'" % (index, filestring))
# do something with file
# job from the server...
time.sleep(5)
result = { "a" : 1, "b" : 2, "c" : 3}
result_msg = { "result" : result, "jidx" : index }
self.log.write("Completed job %d from server with result '%s'" % (index, result))
# serialize and tell server
result_str = json.dumps(result_msg)
self.transport.write(encoded_str)
def lengthLimitExceeded(self, msg):
self.transport.loseConnection()
class JobClientFactory(protocol.ClientFactory):
def buildProtocol(self, addr):
p = JobClientProtocol()
p.factory = self
return p
reactor.connectTCP("127.0.0.1", 12345, JobClientFactory())
reactor.run()
logging.py
class JobLogger(object):
def __init__(self, filename):
self.log = open(filename, 'a')
def write(self, string):
self.log.write("%s\n" % string)
def close(self):
self.log.close()
Running, testing locally with only one client:
$ twistd -y JobServer.py -l ./jobserver.log --pidfile=./jobserver.pid
$ python JobClient.py
Problems I'm having:
In general, I hope these protocols ensure that operations on either side can take any amount of time, but perhaps I didn't design that correctly.
The client and server .log files don't get written to reliably - sometimes not until after I kill the process.
If you want bytes to appear on disk in a timely manner, you may need to call flush
on your file object.
The protocol gets stuck after the client connects and the server sends back a message. The message seemingly never gets to the client.
The server doesn't send int32 strings to the client: it calls transport.write
directly. The client gets confused because these end up looking like extremely long int32 strings. For example, the first four bytes of "Internal server error" decode as the integer 1702129225 so if there is an error on the server and these bytes are sent to the client, the client will wait for roughly 2GB of data before proceeding.
Use Int32StringReceiver.sendString
instead.