Search code examples
pythonhttpnetwork-programmingwebsockettcp

Building a persistent HTTP webserver


My inner while loop containing msg = client_socket.recv(1024).decode() is unable to handle multiple client requests without having client_socket, addr = server_socket.accept() inside the loop. I'm not sure how can I keep reusing the same connection for all the requests.

from socket import *
import sys

def run_server(port):
    # Create server socket
    server_socket = socket(AF_INET, SOCK_STREAM)
    # Bind the socket to a specific address
    server_socket.bind(('localhost', port))

    # Argument 1 represents the maximum number of queued connections
    server_socket.listen(1)
    print(f"Serving on port {port}")

    while 1: 
        # Accept incoming connection from client 
        client_socket, addr = server_socket.accept()
        print(f"Connection established from address {addr}")

        while 1:    
            # Receive data from client
            msg = client_socket.recv(1024).decode()
            # Extract file name
            file_name = msg.split('\n')[0].split()[1].replace('/', '')
        
            # Process client requests
            try:
                with open(file_name, 'rb') as file:
                    status_line = "HTTP/1.1 200 OK\r\n"

                    file_extension = file_name.split('.')[-1].lower()

                    if file_extension == 'html':
                        content_type_header = "Content-Type: text/html\r\n\r\n"
                    elif file_extension == 'png':
                        content_type_header = "Content-Type: image/png\r\n\r\n"

                    content = file.read()

                    # Combine status line, headers and content
                    http_response = (status_line + "Connection: keep-alive\r\n" + content_type_header).encode() + content
            except OSError as _:
                # File not found, generate a 404 response
                status_line = "HTTP/1.1 404 Not Found\r\n"
                content_type_header = "Content-Type: text/html\r\n\r\n"
                content = "<h1>404 Not Found</h1><p>The requested file was not found on this server.</p>"
                
                http_response = (status_line + content_type_header + content).encode()

            # Send HTTP response back to client
            client_socket.sendall(http_response)
            # Close the connection after handling multiple requests
        client_socket.close()
        print("Connection closed")

    server_socket.close()

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python WebServer.py <port>")
        sys.exit(1)

    # Get port number from command line parameter 
    port = int(sys.argv[1])
    run_server(port)

Solution

  • I assume you are doing this to learn since implementing the HTTP protocol is complex.

    TCP is a byte streaming protocol and you wil need to follow a higher-level protocol such as HTTP 1.0 [RFC 1945] to determine how to read the stream and how much to read. Since HTTP requests contain \r\n-terminated lines for the request and headers and a blank line to indicate end-of-headers, wrapping the TCP socket in a file-like object via socket.makefile makes it easier to process the request.

    To implement keep-alive, the headers need a Content-Length header to know how much content to read.

    Below I've made minimal changes to support your implementation. It is by no means complete. Make a root directory in the same directory as the script below and populated it with a few HTML and PNG files like index.html or icon.png. Point a browser to http://localhost:8080/index.html or http://localhost:8080/icon.png and the files should display in the browser. At least they did for me on Chrome and Edge 😊.

    import socket
    import sys
    import os
    from pprint import pprint
    
    def run_server(port):
        with socket.socket() as server_socket:
            server_socket.bind(('localhost', port))
            server_socket.listen()
            print(f'Serving on port {port}')
    
            while True:
                client_socket, addr = server_socket.accept()
                # wrap client socket in a file-like object.  Gives access to .readline().
                with client_socket, client_socket.makefile('rb') as infile:
                    print(f'{addr}: connected')
    
                    while True:
                        # Receive HTTP request
                        line = infile.readline()
                        if not line: break
                        request = line.decode('ascii')
                        print(request)
    
                        # Read headers line-by-line and add to dictionary
                        headers = {}
                        while line := infile.readline():
                            if line == b'\r\n': break
                            key, _, value = line.decode('ascii').partition(':')
                            headers[key.strip()] = value.strip()
                        pprint(headers)
    
                        op, path, protocol = request.split()
                        print(op, path, protocol)
                        # hacky way to look under a directory called root below the webserver script.
                        file_name = os.path.abspath(os.path.join('root/.' + path))
                        print(file_name)
    
                        # minimally handles a GET for html and png files.
                        try:
                            with open(file_name, 'rb') as file:
                                http_response = b'HTTP/1.1 200 OK\r\n'
                                file_extension = file_name.split('.')[-1].lower()
                                if file_extension == 'html':
                                    http_response += b'Content-Type: text/html\r\n'
                                elif file_extension == 'png':
                                    http_response += b'Content-Type: image/png\r\n'
                                content = file.read()
                        except FileNotFoundError as e:
                            # File not found, generate a 404 response
                            http_response = b'HTTP/1.1 404 Not Found\r\n'
                            http_response += b'Content-Type: text/html\r\n'
                            content = b'<h1>404 Not Found</h1><p>The requested file was not found on this server.</p>'
    
                        # Send keepalive and the length of the content.
                        http_response += b'Connection: keep-alive\r\n'
                        http_response += f'Content-Length: {len(content)}\r\n'.encode('ascii')
                        # Send blank line (end of headers) and content.
                        http_response += b'\r\n' + content
                        client_socket.sendall(http_response)
                    print(f'{addr}: disconnected')
    
    if __name__ == '__main__':
        if len(sys.argv) != 2:
            print('Usage: python WebServer.py <port>')
            sys.exit(1)
    
        # Get port number from command line parameter
        port = int(sys.argv[1])
        run_server(port)