I am implementing my own http server:
import socket
import threading
import queue
import ssl
from manipulator.parser import LineBuffer,LoggableHttpRequest
class SocketServer:
"""
Basic Socket Server in python
"""
def __init__(self,host,port,max_threads,ssl_context:ssl.SSLContext=None):
print("Create Server For Http")
self.host = host
self.port = port
self.server_socket = self.initSocket()
self.max_threads = max_threads
self.request_queue = queue.Queue()
self.ssl_context=None
if(ssl_context != None):
print("Initialise SSL context")
self.ssl_context = ssl_context
def initSocket(self):
return socket.socket(socket.AF_INET, socket.SOCK_STREAM)
def __accept(self):
self.server_socket.listen(5)
while True:
try:
client_socket, client_address = self.server_socket.accept()
if self.ssl_context is not None :
print(self.ssl_context)
client_socket = self.ssl_context.wrap_socket(client_socket, server_side=True)
self.request_queue.put((client_socket, client_address))
except:
print("Error Occured")
def __handle(self):
while True:
client_socket, address = self.request_queue.get()
print("Address",address)
try:
# Read HTTP Request
# Log Http Request
# Manipulate Http Request
# Forward or respond
buffer = LineBuffer()
request = HttpRequest(self.db)
buffer.pushData(client_socket.recv(2048))
line = buffer.getLine()
if(line is not None):
request.parse(line)
content = '<html><body>Hello World</body></html>\r\n'.encode()
headers = f'HTTP/1.1 200 OK\r\nContent-Length: {len(content)}\r\nContent-Type: text/html\r\n\r\n'.encode()
client_socket.sendall(headers + content)
finally:
client_socket.shutdown(socket.SHUT_RDWR)
client_socket.close()
self.request_queue.task_done()
def __initThreads(self):
for _ in range(self.max_threads):
threading.Thread(target=self.__handle, daemon=True).start()
def start(self):
self.server_socket.bind((self.host, self.port))
self.__initThreads()
self.__accept()
And the reason why I do this is that I want to log and analyze incomming httpo requests as fast as possible. Also, many 3rd party libs do require C bindings that I want to avoid.
So far, I made a line chunker that splits the request into \r\n:
class LineBuffer:
def __init__(self):
self.buffer = b''
def pushData(self,line):
self.buffer += str.encode(line)
def getLine(self):
if b'\r\n' in self.buffer:
line,sep,self.buffer = self.buffer.partition(b'\r\n')
return line+sep
return None
And I want to parse each line and serialize it into an object representing an http request to I can pipe it further in a streaming manner:
class HttpRequest:
def __init__(self,db):
self.headers={} #ParsedHeaderrs
self.body="" #Http Body
self.version=None
self.method=None
self.id=None
self.raw=""
class HttpParser:
def __init__(self,db):
self.db = db
self.currentRequest=None
def parse(line):
# do parsing here
return
What it worries me most is the scenario that a client will send 2 requests:
Request 1:
GET / HTTP/1.1\r\n
HOST lala1.com \r\n
Request 2:
POST /file HTTP/1.1\r\n
HOST lala2.com \r\n
\r\n
Qm9QUVM5NDMuLnEvXVN7O2E=
fDMpQjcpOlFodClgOGUzYQ==
NVgvNipmU1d3YFgtLFUhQiM=
MiZwSk0zKno9TkVxNyZFL3s=
NEhGJXZ7OGciOE8mYF5JNA==
dVlJLzpdKlUjXl4tcEpufQ==
XVgiXCdjQyckMjY/Ikt6Rw==
alksJlZ+XHFzQSYqaHlHIztt
YiRnPjdye0gvanV3ZGxaZkI=
MjgwTX0uYHw6M295RS52UDM=
YU0yQ2dQLmJUQVpCNS89PWJB
Ti10MHJBTjAqUFUlIU0sMyRN
But the sequence my server receives it is:
GET / HTTP/1.1\r\n
POST /file HTTP/1.1\r\n
HOST lala1.com \r\n
\r\n\r\nQm9QUVM5ND
HOST lala2.com \r\n
MuLnEvXVN7O2E=
fDMpQjcpOlFodClgOGUzYQ==
NVgvNipmU1d3YFgtLFUhQiM=
MiZwSk0zKno9TkVxNyZFL3s=
NEhGJXZ7OGciOE8mYF5JNA==
dVlJLzpdKlUjXl4tcEpufQ==
XVgiXCdjQyckMjY/Ikt6Rw==
alksJlZ+XHFzQSYqaHlHIztt
YiRnPjdye0gvanV3ZGxaZkI=
MjgwTX0uYHw6M295RS52UDM=
YU0yQ2dQLmJUQVpCNS89PWJB
Ti10MHJBTjAqUFUlIU0sMyRN
\r\n
Is a feasible scenario in my case? Or the tcp socket handles the data order by itself?
With HTTP/1 requests and responses are serialized, i.e. there is no interleaving of multiple requests or responses within the same TCP connection and the responses must be in the same order and on the same TCP connection as the requests.
With HTTP/2 this is different in that requests and responses are split into frames inside the same TCP connection and these frames can be interleaved. So multiple requests and responses can be transmitted at the same time and the order of responses does not need to match the order of requests. But your current code expects HTTP/1 only, i.e. it does not even attempt to parse the very different format of HTTP/2 and writes only HTTP/1 responses.
For all the details of the protocols see the relevant standards.