I have a simple web server written in C that works fine when serving a small HTML file with a few images to a browser. When I try to serve a more complex website with a lot more objects with different content types, like css and js files, I found that I do not receive requests for many of the objects needed to properly load index.html - the browser stays waiting for host indefinitely. If I refresh the page a couple of times, eventually everything loads correctly and I'm able to follow the hyperlinks. Another thing I've noticed is that it's usually the same files that are not being sent back to the browser.
#include <sys/socket.h>
#include <sys/sendfile.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <stdbool.h>
#include <pthread.h>
bool writeDataToClient(int sckt, const void *data, int datalen)
{
const char *pdata = (const char*) data;
while (datalen > 0){
int numSent = send(sckt, pdata, datalen, 0);
if (numSent <= 0){
if (numSent == 0){
printf("The client was not written to: disconnected\n");
} else {
perror("The client was not written to");
}
return false;
}
pdata += numSent;
datalen -= numSent;
}
return true;
}
bool writeStrToClient(int sckt, const char *str)
{
return writeDataToClient(sckt, str, strlen(str));
}
int get_filename_and_method(char *str, char **buf1, char **buf2)
{
char *request = str;
char *status_line;
char *url;
char *token = strtok(request, "\r\n");
status_line = token;
*buf1 = strtok(status_line, " ");
if (strcasecmp(*buf1, "GET") != 0) return -1;
url = strtok(NULL, " ");
if (strncmp(url, "/", strlen("/")) != 0) return -1;
if (strlen(url) == 1) strcat(url, "index.html");
if (url[strlen(url) - 1] == '/') strcat(url, "index.html");
char *tmp = strdup(url);
strcpy(url, "web");
strcat(url, tmp);
*buf2 = url;
free(tmp);
return 0;
}
int get_connection_type(char *str, char **buf)
{
char *req = str;
char *token = strtok(req, "\r\n");
char *connection;
while (token != NULL)
{
if (strncmp(token, "Connection:", 11) == 0)
{
connection = token;
strtok(connection, " ");
if (strcasecmp(strtok(NULL, " "), "Keep-Alive") == 0)
{
*buf = "Connection: keep-alive\r\n\r\n";
return 0;
}
}
token = strtok(NULL, "\r\n");
}
*buf = "Connection: close\r\n\r\n";
return 0;
}
void *connection_handler (void *sockfd)
{
// Connection handler
int sock = *(int*)sockfd;
char *buffer, *method, *filename, *connection_type, *content_type;
int bufsize = 2048;
const char *HTTP_404_CONTENT = "<html><head><title>404 Not "
"Found</title></head><body><h1>404 Not Found</h1>The requested "
"resource could not be found but may be available again in the "
"future."</body></html>";
const char *HTTP_501_CONTENT = "<html><head><title>501 Not "
"Implemented</title></head><body><h1>501 Not Implemented</h1>The "
"server either does not recognise the request method, or it lacks "
"the ability to fulfill the request.</body></html>";
buffer = (char*) malloc(bufsize);
if (!buffer){
printf("The receive buffer was not allocated\n");
exit(1);
}
while (1)
{
int numRead = recv(sock, buffer, bufsize, 0);
if (numRead < 1){
if (numRead == 0){
printf("The client was not read from: disconnected\n");
break;
} else {
perror("The client was not read from");
break;
}
close(sock);
continue;
}
printf("%.*s\n", numRead, buffer);
// Extract info from request header
get_connection_type(buffer, &connection_type);
if (get_filename_and_method(buffer, &method, &filename) == -1)
{
char clen[40];
writeStrToClient(sock, "HTTP/1.1 501 Not Implemented\r\n");
sprintf(clen, "Content-length: %zu\r\n", strlen(HTTP_501_CONTENT));
writeStrToClient(sock, clen);
writeStrToClient(sock, "Content-Type: text/html\r\n");
writeStrToClient(sock, connection_type);
writeStrToClient(sock, HTTP_501_CONTENT);
}
else
{
// Open and read file
long fsize;
FILE *fp = fopen(filename, "rb");
if (!fp){
perror("The file was not opened");
char clen[40];
writeStrToClient(sock, "HTTP/1.1 404 Not Found\r\n");
sprintf(clen, "Content-length: %zu\r\n", strlen(HTTP_404_CONTENT));
writeStrToClient(sock, clen);
writeStrToClient(sock, "Content-Type: text/html\r\n");
writeStrToClient(sock, connection_type);
writeStrToClient(sock, HTTP_404_CONTENT);
if (strcmp(connection_type, "Connection: close\r\n\r\n") == 0)
break;
continue;
}
printf("The file was opened\n");
if (fseek(fp, 0, SEEK_END) == -1){
perror("The file was not seeked");
exit(1);
}
fsize = ftell(fp);
if (fsize == -1) {
perror("The file size was not retrieved");
exit(1);
}
rewind(fp);
char *msg = (char*) malloc(fsize);
if (!msg){
perror("The file buffer was not allocated\n");
exit(1);
}
if (fread(msg, fsize, 1, fp) != 1){
perror("The file was not read\n");
exit(1);
}
fclose(fp);
// Get extension of filename
char *ext = strrchr(filename, '.');
if (ext != NULL)
ext++;
if (strcmp(ext, "html") == 0 || strcmp(ext, "htm") == 0)
content_type = "Content-Type: text/html\r\n";
else if (strcmp(ext, "css") == 0)
content_type = "Content-Type: text/css\r\n";
else if (strcmp(ext, "jpg") == 0)
content_type = "Content-Type: image/jpeg\r\n";
else if (strcmp(ext, "png") == 0)
content_type = "Content-Type: image/png\r\n";
else if (strcmp(ext, "gif") == 0)
content_type = "Content-Type: image/gif\r\n";
else
content_type = "Content-Type: text/plain\r\n";
if (!writeStrToClient(sock, "HTTP/1.1 200 OK\r\n")){
close(sock);
continue;
}
char clen[40];
sprintf(clen, "Content-length: %ld\r\n", fsize);
if (!writeStrToClient(sock, clen)){
printf("Cannot write content length\n");
close(sock);
continue;
}
if (!writeStrToClient(sock, content_type)){
close(sock);
continue;
}
if (!writeStrToClient(sock, connection_type) == -1){
close(sock);
continue;
}
if (!writeDataToClient(sock, msg, fsize)){
close(sock);
continue;
}
printf("The file was sent successfully\n");
}
if (strcmp(connection_type, "Connection: close\r\n\r\n") == 0)
break;
}
close(sock);
pthread_exit(0);
}
int main(int argc, char *argv[]){
int create_socket, new_socket;
struct sockaddr_in address;
socklen_t addrlen;
char *ptr;
if (argc != 2)
{
printf("Usage: %s <port number>\n", argv[0]);
exit(0);
}
create_socket = socket(AF_INET, SOCK_STREAM, 0);
if (create_socket == -1){
perror("The socket was not created");
exit(1);
}
printf("The socket was created\n");
const unsigned short port = (unsigned short) strtol(argv[1], &ptr, 10);
memset(&address, 0, sizeof(address));
address.sin_family = AF_INET;
address.sin_addr.s_addr = INADDR_ANY;
address.sin_port = htons(port);
if (bind(create_socket, (struct sockaddr *) &address, sizeof(address)) == -1){
printf("The socket was not bound because that port is not available\n");
exit(1);
}
printf("The socket is bound\n");
if (listen(create_socket, 10) == -1){
perror("The socket was not opened for listening");
exit(1);
}
printf("The socket is listening\n");
while (1) {
addrlen = sizeof(address);
pthread_t tid;
new_socket = accept(create_socket, (struct sockaddr *) &address, &addrlen);
if (new_socket == -1) {
perror("A client was not accepted");
exit(1);
}
printf("A client is connected from %s:%hu...\n", inet_ntoa(address.sin_addr), ntohs(address.sin_port));
if (pthread_create(&tid, NULL, connection_handler, (void *)&new_socket) < 0)
{
perror("Could not create thread");
return 1;
}
pthread_join(tid, NULL);
}
if (new_socket < 0)
{
perror("accept failed");
return 1;
}
close(create_socket);
printf("Socket was closed\n");
return 0;
}
Also, closing the browser (disconnecting from the server) causes another connection to be accepted which sends the first file that was requested by the browser but was not received and then the server program ends without any error message.
UPDATE: Removing pthread_join allowed for the page to load properly. As a user mentioned, the browser executes several connections in parallel so what I think was happening was that all of the requests were being sent through multiple connections (looking at my program's output, it appears that there were 5 connections to the server). As pthread_join waits for a thread (connection) to finish, only one connection was handled at a time which is why I wasn't receiving all of the requests.
HTTP might be more complex than you think. Have you read its specification in full (RFC 7230 for HTTP 1.1) , or some book about HTTP ? Did you consider using some HTTP server library, such as libonion or libhttp (or libmicrohttpd or others)? The size of these libraries tells something about the complexity of HTTP! And these libraries are free software, so you can study their source code and take inspiration from them. (Your teacher should be delighted if you tell him honestly that you studied the source code of e.g. libonion
and have read RFC 7230).
BTW, modern browsers (recent Firefox or Chrome, etc...) tend to use several connections in parallel to display one single page. And modern browsers are capable to show you the actual HTTP traffic and network protocols.
My recommendation is to use some existing library. I am quite happy using libonion
, even if it has some limitations.
At last, read how to debug small programs. Enable all warnings and debug info (so compile using gcc -Wall -Wextra -g
with a recent GCC, e.g. GCC 8 in end of 2018). Learn how to debug with GDB (and use also a recent one, GDB 8.2 in end of 2018). Use also valgrind and perhaps clang-analyzer.
I have a simple web server written in C
This is a contradiction in terms. A web server either cannot be simple, or does not implement all of HTTP.
Your use of sprintf
is dangerous (risk of buffer overflow). I strongly recommend using snprintf instead. And your 404 handling looks really bad.
A different question (and I don't know its answer) is if in your particular case, with some particular browser clients, you might implement a small fraction of HTTP (enough for your browsers and clients) for your particular case .