So I'm doing some benchmarking of TCP to study the impact of the amount of data transferred per connection over the resulting bandwidth. So I wrote a server and a client in C to measure what I need. I then used a Python script to run the experiments many times (To have a precision of +/- 1% I ran my test for 100s. For each point of data I ran the experiment 33 times to get a decent average.), with different inputs, and to gather the results.
The results I get seem about right (I can even observe the expected plateau at higher amounts of data transferred per connection) with the exception that the bandwidth is only 10% of what it should be...
Because I only have access to one computer to run this benchmark, I'm doing the tests on localhost, but that shouldn't be an issue.
Here are my results:
As you can see, it seems the best bandwidth I can get is a bit more than 300 MB/s... But if I run a bandwidth test with iperf (I made sure to use the same TCP window size), on localhost I get a bandwidth of about 3 GB/s.
Here is the code of my client:
int main(int argc, char const *argv[])
{
unsigned int size;
unsigned int timeout;
int sockfd;
struct sockaddr_in server_addr;
if(argc < 4 || argc > 5 ){
usage();
exit(1);
}
const char * ip = "127.0.0.1";
ip = argv[3];
int port = PORT;
if(argc == 5) {
port = atoi(argv[4]);
}
size = atoi(argv[1]);
timeout = atoi(argv[2]);
unsigned int count = 0;
struct timespec start, end;
clock_gettime(CLOCK_MONOTONIC_RAW, &start);
clock_gettime(CLOCK_MONOTONIC_RAW, &end);
while((end.tv_sec - start.tv_sec) < timeout) {
// socket create and varification
sockfd = socket(AF_INET, SOCK_STREAM, 0);
if (sockfd == -1) {
perror("Could not create socket\n");
exit(0);
}
bzero(&server_addr, sizeof(server_addr));
// assign IP, PORT of the server
server_addr.sin_family = AF_INET;
server_addr.sin_addr.s_addr = inet_addr(ip);
server_addr.sin_port = htons(port);
// connect the client socket to server socket
if (connect(sockfd, (struct sockaddr *)&server_addr, sizeof(server_addr)) != 0) {
perror("connection with the server failed");
exit(1);
}
unsigned int nread = 0;
unsigned int nreadcum = 0;
char* buf = malloc(size);
char* bufbuf = buf;
while(nreadcum < size){
nread = read(sockfd, bufbuf, size-nreadcum);
nreadcum += nread;
bufbuf+=nread;
}
// close connection
close(sockfd);
count++;
clock_gettime(CLOCK_MONOTONIC_RAW, &end);
free(buf);
}
uint64_t sec = (end.tv_sec - start.tv_sec);
double bandwidth = (count*size)/sec;
printf("%u,%lf,%u,%lu\n", size, bandwidth, count, sec);
return 0;
}
And here is the code of my server:
int serv_sock_fd;
int main(int argc, char const *argv[])
{
int size;
struct sockaddr_in serv_addr;
struct sockaddr_in client_addr;
int bound_port;
if(argc != 2){
usage();
exit(1);
}
size = atoi(argv[1]);
int serv_sock_fd = socket(AF_INET,SOCK_STREAM,0);
int true = 1;
setsockopt(serv_sock_fd,SOL_SOCKET,SO_REUSEADDR,&true,sizeof(int));
if(serv_sock_fd == -1) {
perror("Failed to open server socket");
exit(1);
}
bzero(&serv_addr, sizeof(serv_addr));
serv_addr.sin_family = AF_INET;
serv_addr.sin_addr.s_addr = INADDR_ANY;
serv_addr.sin_port = htons(PORT);
// Bind socket to the chosen port
if ((bound_port = bind(serv_sock_fd, (struct sockaddr *) &serv_addr, sizeof(serv_addr))) <0){
perror("Could not bind socket to local port");
exit(1);
}
// Listen on the port
if (listen(serv_sock_fd, 16))
{
perror("Could not listen");
exit(1);
}
signal(SIGINT, sigint_handler);
printf("Waiting for connection on %d ...\n", PORT);
int returned = 1;
while(returned) {
printf(".\n");
int new_socket_fd;
unsigned int client_addr_len = sizeof(client_addr);
if ((new_socket_fd = accept(serv_sock_fd, (struct sockaddr *)&client_addr,
&client_addr_len))<0) {
perror("Could not accept client connection");
exit(1);
}
printf("connection received, start sending ... ");
char * payload = sequence_payload(size);
returned = write(new_socket_fd, payload, size);
printf("finished sending\n");
printf("Returned value = %d\n", returned);
close(new_socket_fd);
free(payload);
}
close(serv_sock_fd);
return 0;
}
char * sequence_payload(int size) {
char * payload = malloc(size);
for (int i = 0; i < size; i++)
{
payload[i] = i%256;
}
return payload;
}
Basically what my code is doing is:
To calculate the bandwidth, I can just do (number_of_connections_completed * size_transferred_by_connection) / duration_of_all_transfers
. I use python to calculate the bandwidth, to be free of any overflow in C.
TLDR: The bandwidth I get with my C programs is 10 times less than what it should be on localhost. What could be the source of that problem?
malloc
and free
are the main issue here. Since they are system call they take a significant amount of time, and since I am measuring the performance of TCP and not those of memory allocation, malloc
and free
should be outside my profiled loop. Also, the same thing applies to printf
in the server-side loop, while not as bad as malloc
, the time it takes to print something on the screen is not something that should be taken into account when measuring the performance of TCP.