Please note, I already know about the streaming nature of TCP connections; my question is not related to those kinds of things. It rather about my suspicion of there being a bug in the Linux sockets implementation.
Update: Taking comments into account, I updated my code a little bit to check the return value of recv()
not only to -1 but to any negative value. That was just in case. The results are the same.
I have a very simple TCP client/server application written in C. The full code of this project is available on github.
The client side runs multiple parallel threads, and each of threads does the following:
static size_t send_ex(int fd, const uint8_t *buff, size_t len, bool by_frags)
{
if ( by_frags )
{
size_t chunk_len, pos;
size_t res;
for ( pos = 0; pos < len; )
{
chunk_len = (size_t) random();
chunk_len %= (len - pos);
chunk_len++;
res = send(fd, (const char *) &buff[pos], chunk_len, 0);
if ( res != chunk_len) {
return (size_t) -1;
}
pos += chunk_len;
}
return len;
}
return send(fd, buff, len, 0);
}
static void *connection_task(void *arg)
{
connection_ctx_t *ctx = (connection_ctx_t *) arg;
uint32_t buff[4] = {0xAA55AA55, 0x12345678, 0x12345678, 0x12345678};
int res, fd, i;
for ( i = 0; i < count; i++ )
{
fd = socket(AF_INET, SOCK_STREAM, 0);
if ( fd < 0 ) {
fprintf(stderr, "Can't create socket!\n");
break;
}
res = connect(fd, (struct sockaddr *) ctx->serveraddr, sizeof(struct sockaddr_in));
if ( res < 0 ) {
fprintf(stderr, "Connect failed!\n");
close(fd);
break;
}
res = send_ex(fd, (const char *) buff, sizeof(buff), frags);
if ( res != sizeof(buff) ) {
fprintf(stderr, "Send failed!\n");
close(fd);
break;
}
ctx->sent_packs++;
res = close(fd);
if ( res < 0 ) {
fprintf(stderr, "CLI: Close Failed!!\n");
}
msleep(delay);
}
return NULL;
}
The server side runs a thread on each incoming connection, that does the following:
typedef struct client_ctx_s {
struct sockaddr_in addr;
int fd;
} client_ctx_t;
void *client_task(void *arg)
{
client_ctx_t *client = (client_ctx_t *) arg;
size_t free_space, pos;
ssize_t chunk_len;
uint32_t buff[4] = {0};
int res;
pos = 0;
while ( pos != sizeof(buff) )
{
free_space = sizeof(buff) - pos;
assert(pos < sizeof(buff));
chunk_len = recv(client->fd, &((uint8_t *) buff)[pos], free_space, 0);
if ( chunk_len <= 0 ) {
if ( chunk_len < 0 ) {
fprintf(stderr, "%s:%u: ERROR: recv failed (errno = %d; pos = %zu)!\n",
inet_ntoa(client->addr.sin_addr),
ntohs(client->addr.sin_port),
errno, pos);
}
else if ( pos && pos < sizeof(buff) ) {
fprintf(stderr, "%s:%u: ERROR: incomplete data block (pos = %zu)!\n",
inet_ntoa(client->addr.sin_addr),
ntohs(client->addr.sin_port),
pos);
}
goto out;
}
assert(chunk_len <= free_space);
pos += chunk_len;
if ( pos >= 4 && buff[0] != 0xAA55AA55) {
fprintf(stderr, "%s:%u: ERROR: data corrupted (%08x)!\n",
inet_ntoa(client->addr.sin_addr),
ntohs(client->addr.sin_port),
buff[0]);
}
}
fprintf(stdout, "%s:%u: %08x %08x %08x %08x\n",
inet_ntoa(client->addr.sin_addr),
ntohs(client->addr.sin_port),
buff[0], buff[1], buff[2], buff[3]);
out:
debug("Connection closed\n");
res = close(client->fd);
assert(res == 0);
free(client);
return NULL;
}
Issues that came up when a client runs one thousand of sending threads, and each of them repeats connect-send-disconnect one hundred times (./client -t 1000 -c 100 -d 0 -f
):
This behavior is repeatable both on local host and over a real network connection.
Examining the TCP flow of the corrupted data with Wireshark shows that:
I can't really believe this problem lies in the Linux TCP/IP implementation. Can anybody explain what is wrong with my code?
at first glance there is a similar problem here: https://wpbolt.com/syn-cookies-ate-my-dog-breaking-tcp-on-linux/
but in our case in wireshark see ack for all data packet. it still looks like a kernel bug.
To reproduce this error, it is not necessary to open a large number of TCP connections. 10 is enough.
This can be schematically reproduced as follows:
run server
...
listenfd = socket(...
res = bind(listenfd, ...
res = listen(listenfd, 1); !!! backlog set 1
wait user key press (need wait add socket to backlog queue)
start client
run 10 thread with:
fd = socket(...
z = setsockopt(fd, SOL_TCP, TCP_NODELAY, &one, sizeof(one));
connect(fd ...
for(int i=0;i<28;i++)
send(fd, &buff[i], 1, 0);
recv()
9 TCP streams enter in the backlog queue on the server side and begin re-sending SYN with increasing intervals.
at server side press enter, for unblock and
while(1)
select([listenfd, socketN])
listenfd: new connection
accept(...)
add to socketN
socketN: new data
recv()
As a result, the first bytes of data in several TCP connections will be lost. This behavior is observed on the Ubuntu 24.04 with kernel 6.10.2.