Search code examples
clinuxsocketsunix-socketlibev

UNIX socket read happening even though client does not send data


I am facing a very strange issue, I have a simple UNIX STREAM socket server/client code running on Linux. client occasionally sends a message to server (I tested with sending only once also), however after receiving the first message, the server keeps on printing the same message even though client hasn't sent any message.

The entire message is created afresh, there is no static data etc. which could cause same message persisting over multiple calls.

Client side code:

int g_fd = -1;
#define SERVER_SOCK   "/tmp/server_sock"

int init_fd(void) {
    g_fd = socket(AF_UNIX, SOCK_STREAM, 0);
    if (g_fd < 0) {
        log_info("alloc", "socket() failed with error (%d:%s)", errno, strerror(errno));
        return -1;
    }

    struct sockaddr_un sa;
    memset(&sa, 0, sizeof(sa));
    sa.sun_family = AF_UNIX;
    snprintf(sa.sun_path, sizeof(sa.sun_path), SERVER_SOCK);

    if (connect(g_fd, (struct sockaddr *) &sa, strlen(sa.sun_path) + sizeof(sa.sun_family)) < 0) {
        log_info("alloc", "connect() failed with error (%d:%s)", errno, strerror(errno));
        return -1;
    }
    int flags = fcntl(g_fd, F_GETFL, 0);
    fcntl(g_fd, F_SETFL, flags | O_NONBLOCK);
    return 0;
}

void send_event(void) {
    if (g_fd < 0) {
        if (init_fd() < 0) {
            log_info("alloc", "failed to connect to server");
            return;
        }
    }
    json_t *jtc = json_object();
    json_object_set_new(jtc, "msgType", json_integer(650));
    json_t *jtype = json_object();
    json_object_set_new(jtype, "type", json_string("MESSAGE_CHANGE"));
    json_object_set_new(jtc, "data", jtype);
    char *j_dump_string = NULL;
    j_dump_string = json_dumps(jtc, JSON_PRESERVE_ORDER);
    if (write_a_msg(g_fd, (uint8_t*)j_dump_string, strlen(j_dump_string)+1) == -1) {
        close(g_fd);
        g_fd = -1;
        log_info("alloc", "failed to send message to server");
    }
    log_info("alloc", "GNA: da_send: %s", j_dump_string);
    free(j_dump_string);
    json_decref(jtc);
}

int write_a_msg(int fd, const uint8_t *ptr, size_t nbytes) {
    uint8_t *write_buf = malloc(nbytes + MSG_LEN_SIZE);
    if (!write_buf)
        return -1;
    write_buf[0] = (nbytes >> 24);
    write_buf[1] = (nbytes >> 16);
    write_buf[2] = (nbytes >> 8);
    write_buf[3] = (nbytes);
    memcpy(write_buf + MSG_LEN_SIZE, ptr, nbytes);
    if (write_loop(fd, write_buf, nbytes + MSG_LEN_SIZE) < 0) {
        int save_err = errno;
        free(write_buf);
        errno = save_err;
        return -1;
    }
    free(write_buf);
    return 0;
}

static int write_loop(int fd, const uint8_t *ptr, size_t nbytes) {
  ssize_t nleft, nwritten;
  nleft = nbytes;
  while (nleft) {
        nwritten = write(fd, ptr, nleft);
        if (nwritten <= 0) {
            if (errno != EAGAIN) return -1;
            continue;
        }
        nleft -= nwritten;
        ptr += nwritten;
    }
    return 0;
}

Client side uses libev for handling the sockets, Server uses select:

static int server_init(void) {
    struct sockaddr_un sa;
    memset(&sa, 0, sizeof(sa));
    int fd = socket(AF_UNIX, SOCK_STREAM, 0);
    if (fd < 0) {
        DebugLog(ERROR, "Could not create socket - error (%d:%s)", errno, strerror(errno));
        return -1;
    }
    unlink(SERVER_SOCK);
    sa.sun_family = AF_UNIX;
    strcpy(sa.sun_path, SERVER_SOCK);
    if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0) {
        DebugLog(ERROR, "Bind failed with error (%d:%s)\n", errno, strerror(errno));
        close(fd);
        return -1;
    }
    if (listen(fd, 5)) {
        DebugLog(ERROR, "Listen failed with error (%d:%s)\n", errno, strerror(errno));
        close(fd);
        return -1;
    }
    DebugLog(INFO, "GNA: Create server ready to accept\n");
    ev_add_fd(fd, EV_READ, connection_callback, NULL);
    return RET_OK;
}

static void connection_callback(int fd, int flags, void *data) {
    struct sockaddr_un remote;
    socklen_t rlen = sizeof(struct sockaddr_un);

    int connection_fd = accept(fd, (struct sockaddr *) &remote, &rlen);
    if (connection_fd < 0) {
        DebugLog(ERROR, "%s:Accept failed", __func__);
        return;
    }
    DebugLog(INFO, "GNA: received new connection\n");
    ev_add_fd(connection_fd, EV_READ, request_callback, NULL);
}

static void request_callback(int fd, int flags, void *data) {
    char *msg = NULL;
    size_t msglen = 0;
    if (read_a_msg(fd, (uint8_t **) & msg, &msglen) < 0) {
        DebugLog(ERROR, "%s:read failed (pid:%d). error %d:%s", __func__, getpid(), errno,
                 strerror(errno));
        ev_del_fd(fd);
        close(fd);              // close the FD we'll reopen a new one next time
        if (msg)
            free(msg);
        return;
    }
    msg[msglen] = '\0';         // ensure NULL termination
    handle_msg(msg, msglen, fd);
    if (msg)
        free(msg);
    return;
}

static void handle_msg(char *msg, int msglen, int fd) {
    DebugLog(INFO, "%s: GNA: Received msg: %s", __func__, msg);
}

int read_a_msg(int fd, uint8_t **ptr, size_t *nbytes) {
    uint8_t hd[4];
    if (read_loop(fd, hd, 4) < 0)
        return -1;
    size_t payload_len = (hd[0]<<24)|(hd[1]<<16)|(hd[2]<<8)|(hd[3]);
    (*ptr) = malloc(payload_len + 1); // allocate one extra byte in case the user is reading strings and wants to add a NULL in the end. the length wont include this, its essential padding for convenience.
    *nbytes = payload_len;
    if (read_loop(fd, *ptr, *nbytes) < 0)
        return -1;
    return 0;
}

static int read_loop(int fd, uint8_t *ptr, size_t nbytes) {
    ssize_t nleft, nread;
    nleft = nbytes;
    while (nleft) {
        nread = read(fd, ptr, nleft);
        if (nread < 0) {
            if (errno != EAGAIN) return -1;
            continue;
        }
        if (nread == 0)
            break;
        nleft -= nread;
        ptr += nread;
    }
    if (nleft != 0) return -1;
    return 0;
}

THis log keeps printing every few seconds, although no one is sending anything.

DebugLog(INFO, "%s: GNA: Received msg: %s", __func__, msg);

On the client side, there is only one send function which no one else is calling and this log comes exactly once.

log_info("alloc", "GNA: da_send: %s", j_dump_string);

Can anyone suggest why is this happening. and how to avoid ?

Update 1: After suggestions for strace on client and server I found that server is setting fd in select and getting a read read event periodically

08:19:26.031031 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=16, tv_usec=1049544}) = 1 (in [11], left {tv_sec=17, tv_usec=44972})
08:19:26.035737 read(11, "\0\0\0005", 4) = 4
08:19:26.035814 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:19:38.982049 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=3, tv_usec=1000564}) = 1 (in [11], left {tv_sec=3, tv_usec=946001})
08:19:39.036745 read(11, "\0\0\0005", 4) = 4
08:19:39.036816 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:19:50.117012 _newselect(18, [4 5 6 7 8 9 10 11 15 16 17], [], NULL, {tv_sec=4, tv_usec=1276752}) = 1 (in [17], left {tv_sec=5, tv_usec=276586})
08:19:50.117251 read(17, "\0\0\0005", 4) = 4
08:19:50.117308 read(17, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:19:51.910855 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=2, tv_usec=1070595}) = 1 (in [11], left {tv_sec=2, tv_usec=943831})
08:19:52.037758 read(11, "\0\0\0005", 4) = 4
08:19:52.037841 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:20:05.031834 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=0, tv_usec=1000335}) = 1 (in [11], left {tv_sec=0, tv_usec=993545})
08:20:05.038758 read(11, "\0\0\0005", 4) = 4
08:20:05.038838 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53

I killed the client and attached strace at the begining so that it can get the fd of the socket, client is not writing anything on the fd 14 except for once:

08:10:02.362615 socket(AF_UNIX, SOCK_STREAM, 0) = 14
08:10:02.363091 connect(14, {sa_family=AF_UNIX, sun_path="/tmp/server_sock"}, 21) = 0
08:10:02.363251 fcntl64(14, F_GETFL)    = 0x2 (flags O_RDWR)
08:10:02.363297 fcntl64(14, F_SETFL, O_RDWR|O_NONBLOCK) = 0
08:10:02.363837 write(14, "\0\0\0005{\"msgType\": 650, \"data\": {\"t"..., 57) = 57

Thats the only write for fd 14.

Update 2: The above send_event function gets compiled as part of libabc.so library. After putting getpid() and gettimeofday() in my message, I found out that another daemon was using this library and calling a wrapper function which is calling the send_event function, which is why I see server periodically receiving message, even though client is not sending any message. I am now creating a .pid file that stores the pid of the client, then in the send_event I compare current process pid with pid of client.


Solution

  • The above send_event function gets compiled as part of libabc.so library. After putting getpid() and gettimeofday() in my message, I found out that another daemon was using this library and calling a wrapper function which is calling the send_event function, which is why I see server periodically receiving message, even though client is not sending any message. I am now creating a .pid file that stores the pid of the client, then in the send_event I compare current process pid with pid of client.

    Thanks @pts for the suggestions.