Search code examples
pythonhttpsocketsservertcp

Should I expect the http data to be out of order in my http server for connections comming from a single client socket?


I am implementing my own http server:

import socket
import threading
import queue
import ssl
from manipulator.parser import LineBuffer,LoggableHttpRequest

class SocketServer:
    """
        Basic Socket Server in python
    """

    def __init__(self,host,port,max_threads,ssl_context:ssl.SSLContext=None):
        print("Create Server For Http")        

        self.host = host
        self.port = port
        self.server_socket = self.initSocket()
        self.max_threads = max_threads
        self.request_queue = queue.Queue()   

        self.ssl_context=None
        if(ssl_context != None):
            print("Initialise SSL context")        
            self.ssl_context = ssl_context

    def initSocket(self):
        return socket.socket(socket.AF_INET, socket.SOCK_STREAM)

   
    def __accept(self):
        self.server_socket.listen(5)
        while True:
            try:
                client_socket, client_address = self.server_socket.accept()
                
                if self.ssl_context is not None :
                    print(self.ssl_context)
                    client_socket = self.ssl_context.wrap_socket(client_socket, server_side=True)

                self.request_queue.put((client_socket, client_address))
            except:
                print("Error Occured")


    def __handle(self):
        while True:
            client_socket, address = self.request_queue.get()
            print("Address",address)
            
            try:
                # Read HTTP Request
                # Log Http Request
                # Manipulate Http Request
                # Forward or respond

                buffer = LineBuffer()
                request =  HttpRequest(self.db)

                buffer.pushData(client_socket.recv(2048))
                line = buffer.getLine()
                if(line is not None):
                    request.parse(line)

                content = '<html><body>Hello World</body></html>\r\n'.encode()
                headers = f'HTTP/1.1 200 OK\r\nContent-Length: {len(content)}\r\nContent-Type: text/html\r\n\r\n'.encode()
                client_socket.sendall(headers + content)
          
            finally:
                client_socket.shutdown(socket.SHUT_RDWR)
                client_socket.close()
                self.request_queue.task_done()


    def __initThreads(self):
        for _ in range(self.max_threads):
            threading.Thread(target=self.__handle, daemon=True).start()


    def start(self):
        self.server_socket.bind((self.host, self.port))
        self.__initThreads()
        self.__accept()

And the reason why I do this is that I want to log and analyze incomming httpo requests as fast as possible. Also, many 3rd party libs do require C bindings that I want to avoid.

So far, I made a line chunker that splits the request into \r\n:

class LineBuffer:

    def __init__(self):
        self.buffer = b''
    
    def pushData(self,line):
        self.buffer += str.encode(line)
    
    def getLine(self):
        if  b'\r\n' in self.buffer:
            line,sep,self.buffer = self.buffer.partition(b'\r\n')
            return line+sep
        return None

And I want to parse each line and serialize it into an object representing an http request to I can pipe it further in a streaming manner:

class HttpRequest:
    
    def __init__(self,db):
        self.headers={} #ParsedHeaderrs
        self.body="" #Http Body
        self.version=None
        self.method=None
        self.id=None
        self.raw=""

class HttpParser:

    def __init__(self,db):
        self.db = db
        self.currentRequest=None
    
    def parse(line):
        # do parsing here
        return

What it worries me most is the scenario that a client will send 2 requests:

Request 1:

GET / HTTP/1.1\r\n
HOST lala1.com \r\n

Request 2:

POST /file HTTP/1.1\r\n
HOST lala2.com \r\n
\r\n
Qm9QUVM5NDMuLnEvXVN7O2E=
fDMpQjcpOlFodClgOGUzYQ==
NVgvNipmU1d3YFgtLFUhQiM=
MiZwSk0zKno9TkVxNyZFL3s=
NEhGJXZ7OGciOE8mYF5JNA==
dVlJLzpdKlUjXl4tcEpufQ==
XVgiXCdjQyckMjY/Ikt6Rw==
alksJlZ+XHFzQSYqaHlHIztt
YiRnPjdye0gvanV3ZGxaZkI=
MjgwTX0uYHw6M295RS52UDM=
YU0yQ2dQLmJUQVpCNS89PWJB
Ti10MHJBTjAqUFUlIU0sMyRN

But the sequence my server receives it is:

GET / HTTP/1.1\r\n
POST /file HTTP/1.1\r\n
HOST lala1.com \r\n
\r\n\r\nQm9QUVM5ND
HOST lala2.com \r\n
MuLnEvXVN7O2E=
fDMpQjcpOlFodClgOGUzYQ==
NVgvNipmU1d3YFgtLFUhQiM=
MiZwSk0zKno9TkVxNyZFL3s=
NEhGJXZ7OGciOE8mYF5JNA==
dVlJLzpdKlUjXl4tcEpufQ==
XVgiXCdjQyckMjY/Ikt6Rw==
alksJlZ+XHFzQSYqaHlHIztt
YiRnPjdye0gvanV3ZGxaZkI=
MjgwTX0uYHw6M295RS52UDM=
YU0yQ2dQLmJUQVpCNS89PWJB
Ti10MHJBTjAqUFUlIU0sMyRN
\r\n

Is a feasible scenario in my case? Or the tcp socket handles the data order by itself?


Solution

  • With HTTP/1 requests and responses are serialized, i.e. there is no interleaving of multiple requests or responses within the same TCP connection and the responses must be in the same order and on the same TCP connection as the requests.

    With HTTP/2 this is different in that requests and responses are split into frames inside the same TCP connection and these frames can be interleaved. So multiple requests and responses can be transmitted at the same time and the order of responses does not need to match the order of requests. But your current code expects HTTP/1 only, i.e. it does not even attempt to parse the very different format of HTTP/2 and writes only HTTP/1 responses.

    For all the details of the protocols see the relevant standards.