Search code examples
ceofgetline

C getline() returns -1 before EOF


I'm using the getline() function in a c program to read lines from a file, place read info into a struct, creates a thread with that struct as an argument, and repeat. However, it getline() returns -1 before the EOF, after the second set of arguments under "id: test2" in the file provided.
Since print errno in gdb returns 0, I assume there's no error and getline() somehow seems to believe it has reached EOF.
The create thread portion has been commented out because it's unrelated to the problem at hand.

The code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <pthread.h>
#include <sys/time.h>

#define BUFFER_SIZE 256

time_t cur;
char* keypath = "home/chy/.ssh/id_rsa";
int logfreq = 1;
int hashfreq = 180;

struct getArgs
{
  const char* id;
  const char* hostaddr;
  uint16_t port;
  const char* uname;
  const char* path;
};

int main(int argc, char **argv)
{
  int th_max = 5;
  int th_count = 0;
  pthread_t* ths = (pthread_t*) malloc(th_max * sizeof(pthread_t));

  FILE* fp;
  fp = fopen("config.txt", "r");

  char* line = NULL;
  char buf[128];
  int r = 0;
  size_t len = 0;
  ssize_t read;

  system("mkdir logs && chmod -R a+r logs");

  while(1)
  {
    pthread_t th;
//Get ID
    while((read = getline(&line, &l, fp)) != -1)
    {
      if(!(read > 0))
        continue;
      if(sscanf(line, "id: %[^\n]\n", buf) == 1)
        break;
      if(sscanf(line, "keypath: %[^\n]\n", buf) == 1)
        keypath = strdup(buf);
      else if(sscanf(line, "logfreq: %d\n", &r) == 1)
        logfreq = r;
      else if(sscanf(line, "hashfreq: %d\n", &r) == 1)
        hashfreq = r;
    }
    if(read == -1)
      break;
    struct getArgs* args = (struct getArgs*)malloc(sizeof(struct getArgs));
    args->id = strdup(buf);
//Get Host Address
    if(getline(&line, &len, fp) == -1)
    {
      free_args(args);
      break;
    }
    if(sscanf(line, "hostaddr: %[^\n]\n", buf) != 1)
    {
      free_args(args);
      continue;
    }
    args->hostaddr = strdup(buf);
//Get Port Number
    if(getline(&line, &len, fp) == -1)
    {
      free_args(args);
      break;
    }
    if(sscanf(line, "port: %d\n", &r) != 1)
    {
      free_args(args);
      continue;
    }
    args->port = r;
//Get Username
    if(getline(&line, &len, fp) == -1)
    {
      free_args(args);
      break;
    }
    if(sscanf(line, "username: %[^\n]\n", buf) != 1)
    {
      free_args(args);
      continue;
    }
args->uname = strdup(buf);
//def Path
    if(getline(&line, &len, fp) == -1)
    {
      free_args(args);
      break;
    }
    if(sscanf(line, "path: %[^\n]\n", buf) != 1)
    {
      free_args(args);
      continue;
    }
    args->path = strdup(buf);

//    int err = pthread_create(&th, NULL, &getFiles, args);

    if(th_count > th_max)
    {
      th_max *= 2;
      ths = (pthread_t*)realloc(ths, th_max * sizeof(pthread_t));
    }
    *(ths+th_count * sizeof(pthread_t)) = th;
    th_count++;
  }

  fclose(fp);

  while(1);
}

The file:

keypath: /home/username/.ssh/id_rsaNOPASSWORD
id: test1
hostaddr: XXX.XXX.XXX.XXX
port: 22
username: hpc
path: /home/hpc/
id: test2
hostaddr: XXX.XXX.XXX.XXX
port: 22
username: hpc
path: /home/hpc/
id: test3
hostaddr: XXX.XXX.XXX.XXX
port: 22
username: hpc
path: /home/hpc/
id: test4
hostaddr: XXX.XXX.XXX.XXX
port: 3844
username: uname
path: /home/uname/hpc/

The contents of file pointer before the last getline():

{_flags = -72539000,
  _IO_read_ptr = 0x7ffff7ff80d0 "    \nid: test3\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /home/hpc/\n    \nid: test4\nhostaddr: XXX.XXX.XXX.XXX\nport: 3844\nusername: uname\npath: /home/uname/hpc/\n\n",
  _IO_read_end = 0x401d10 "H\211l$\330L\211d$\340H\215-\277\006 ",
  _IO_read_base = 0x7ffff7ff8000 "keypath: /home/username/.ssh/id_rsaNOPASSWORD\n    \nid: test1\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /home/hpc/\n    \nid: test2\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /ho"...,
  _IO_write_base = 0x7ffff7ff8000 "keypath: /home/username/.ssh/id_rsaNOPASSWORD\n    \nid: test1\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /home/hpc/\n    \nid: test2\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /ho"...,
  _IO_write_ptr = 0x7ffff7ff8000 "keypath: /home/username/.ssh/id_rsaNOPASSWORD\n    \nid: test1\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /home/hpc/\n    \nid: test2\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /ho"...,
  _IO_write_end = 0x7ffff7ff8000 "keypath: /home/username/.ssh/id_rsaNOPASSWORD\n    \nid: test1\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /home/hpc/\n    \nid: test2\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /ho"...,
  _IO_buf_base = 0x7ffff7ff8000 "keypath: /home/username/.ssh/id_rsaNOPASSWORD\n    \nid: test1\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /home/hpc/\n    \nid: test2\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /ho"..., _IO_buf_end = 0x7ffff7ff9000 "P\220\377\367\377\177", _IO_save_base = 0x0, _IO_backup_base = 0x0, _IO_save_end = 0x0, _markers = 0x0, _chain = 0x7ffff7bbb880, _fileno = 7, _flags2 = 0, _old_offset = 0, _cur_column = 0,
  _vtable_offset = 0 '\000', _shortbuf = "", _lock = 0x603120, _offset = -1, __pad1 = 0x0, __pad2 = 0x603130, __pad3 = 0x0, __pad4 = 0x0, __pad5 = 0, _mode = -1, _unused2 = '\000' <repeats 19 times>}

After:

{_flags = -72538984,
  _IO_read_ptr = 0x7ffff7ff8000 "keypath: /home/username/.ssh/id_rsaNOPASSWORD\n    \nid: test1\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /home/hpc/\n    \nid: test2\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /ho"...,
  _IO_read_end = 0x7ffff7ff8000 "keypath: /home/username/.ssh/id_rsaNOPASSWORD\n    \nid: test1\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /home/hpc/\n    \nid: test2\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /ho"...,
  _IO_read_base = 0x7ffff7ff8000 "keypath: /home/username/.ssh/id_rsaNOPASSWORD\n    \nid: test1\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /home/hpc/\n    \nid: test2\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /ho"...,
  _IO_write_base = 0x7ffff7ff8000 "keypath: /home/username/.ssh/id_rsaNOPASSWORD\n    \nid: test1\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /home/hpc/\n    \nid: test2\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /ho"...,
  _IO_write_ptr = 0x7ffff7ff8000 "keypath: /home/username/.ssh/id_rsaNOPASSWORD\n    \nid: test1\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /home/hpc/\n    \nid: test2\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /ho"...,
  _IO_write_end = 0x7ffff7ff8000 "keypath: /home/username/.ssh/id_rsaNOPASSWORD\n    \nid: test1\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /home/hpc/\n    \nid: test2\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /ho"...,
  _IO_buf_base = 0x7ffff7ff8000 "keypath: /home/username/.ssh/id_rsaNOPASSWORD\n    \nid: test1\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /home/hpc/\n    \nid: test2\nhostaddr: XXX.XXX.XXX.XXX\nport: 22\nusername: hpc\npath: /ho"..., _IO_buf_end = 0x7ffff7ff9000 "P\220\377\367\377\177", _IO_save_base = 0x0, _IO_backup_base = 0x0, _IO_save_end = 0x0, _markers = 0x0, _chain = 0x7ffff7bbb880, _fileno = 7, _flags2 = 0, _old_offset = 0, _cur_column = 0,
  _vtable_offset = 0 '\000', _shortbuf = "", _lock = 0x603120, _offset = -1, __pad1 = 0x0, __pad2 = 0x603130, __pad3 = 0x0, __pad4 = 0x0, __pad5 = 0, _mode = -1, _unused2 = '\000' <repeats 19 times>}

I also attempted to implement the same with fgets() instead of getline(), but with the same results.
What might be causing this problem?

Edit:
I have ran the code over the same file, except without excess spaces/blank lines as well as switching up the order to see if anything is affected, but the results are the same.
I can also confirm that the structure for test1 and test2 are properly populated.

{id = 0x6032e0 "test1", hostaddr = 0x603300 "XXX.XXX.XXX.XXX", port = 22, uname = 0x603320 "hpc", path = 0x603340 "/home/hpc/"}
{id = 0x603390 "test2", hostaddr = 0x6033b0 "XXX.XXX.XXX.XXX", port = 22, uname = 0x6033d0 "hpc", path = 0x6033f0 "/home/hpc/"}

edits: version change to code and file


Solution

  • In addition to the unsigned/signed type mismatches discussed in the comments, the primary issue you were having was the conflicting logic used in the tests, and multiple, loosely organized calls to getline and sscanf where you would test if ((read = getline ... || sscanf ...) where either getline or sscanf (on input or matching failure) could return EOF (-1) with no way of telling what the problem was. To clean the logic up, you want to approach reading the config file where you (1) read the line - one time, (2) parse tag and value, then (3) test tag and value and take the needed actions.

    Sticking to that approach, and ignoring the thread code not relevant to your read issue, I debugged/re-wrote your input routine to give you an example of one way to approach the read and separation in a sane manner. In doing so, I collected all settings in an array of pointers to struct (e.g. struct getArgs **args;). You may not need to collect each in an array of structs with your pthread scheme, but for purposes of example, that change was made.

    I have also included a second example below that shows an additional approach with a bit more checking to insure your values are read in id, hostaddr, port, username, and path groups. The first simple example will handle skipping blank lines in the input file:

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <stdint.h>
    
    #define NARGS  32
    #define TSIZE  16
    #define VSIZE 256
    
    char *keypath = "home/chy/.ssh/id_rsa";
    int logfreq = 1;
    int hashfreq = 180;
    
    struct getArgs {
        const char *id;
        const char *hostaddr;
        uint16_t port;
        const char *uname;
        const char *path;
    };
    
    void *xcalloc (size_t n, size_t s);
    void *xrealloc_dp (void *ptr, size_t *n);
    
    int main (int argc, char **argv)
    {
        struct getArgs **args = NULL;
        char *line = NULL;
        size_t len = 0;
        size_t idx = 0;
        size_t nargs = NARGS;
        size_t i;
        ssize_t read;
        FILE *fp = argc > 1 ? fopen (argv[1], "r") : fopen ("config.txt", "r");
    
        if (!fp) { fprintf (stderr, "file open failed.\n"); return 1; }
    
        /* allocate NARGS pointer to struct getArgs */
        args = xcalloc (NARGS, sizeof *args);
    
        /* read each line in file */
        while ((read = getline (&line, &len, fp)) != -1) {
    
            if (read == 1) continue;    /* skip blank lines */
    
            char tag[TSIZE] = {0};
            char val[VSIZE] = {0};
    
            /* separate tag and value (val) */
            if (sscanf (line, "%s %[^\n]%*c", tag, val) != 2) {
                fprintf (stderr, "error: sscanf conversion failed.\n");
                break;
            }
    
            /* handle keypath, logfreq, hashfreq */
            if (strcmp (tag, "keypath:") == 0) {
                keypath = strdup (val);
                continue;
            }
    
            if (strcmp (tag, "logfreq:") == 0) {
                logfreq = (int)strtol (val, NULL, 10);
                continue;
            }
    
            if (strcmp (tag, "hashfreq:") == 0) {
                hashfreq = (int)strtol (val, NULL, 10);
                continue;
            }
    
            /* allocate space for args[idx] if "id:", then
            handle id, hostaddr, port, username, path */
            if (strcmp (tag, "id:") == 0) {
                args[idx] = calloc (1, sizeof **args);
                args[idx]->id = strdup (val);
                continue;
            }
    
            if (strcmp (tag, "hostaddr:") == 0) {
                args[idx]->hostaddr = strdup (val);
                continue;
            }
    
            if (strcmp (tag, "port:") == 0) {
                args[idx]->port = (uint16_t)strtoul (val, NULL, 10);
                continue;
            }
    
            if (strcmp (tag, "username:") == 0) {
                args[idx]->uname = strdup (val);
                continue;
            }
    
            /* increment idx on path */
            if (strcmp (tag, "path:") == 0)
                args[idx++]->path = strdup (val);
    
            if (idx == nargs) /* check idx, realloc */
                args = xrealloc_dp (args, &nargs);
        }
        fclose (fp);
    
        printf ("\n keypath  : %s\n logfreq  : %d\n hashfreq : %d\n",
                keypath, logfreq, hashfreq);
    
        for (i = 0; i < idx; i++)
            printf ("\n id       : %s\n hostaddr : %s\n port     : %hu\n"
                    " username : %s\n path     : %s\n", args[i]->id, args[i]->hostaddr,
                    args[i]->port, args[i]->uname, args[i]->path);
    
        for (i = 0; i < idx; i++)
            free (args[i]);
        free (args);
    
        return 0;
    }
    
    /* calloc with error check, exit on failure */
    void *xcalloc (size_t n, size_t s)
    {
        register void *memptr = calloc (n, s);
        if (memptr == 0) {
            fprintf (stderr, "xcalloc() error: virtual memory exhausted.\n");
            exit (EXIT_FAILURE);
        }
    
        return memptr;
    }
    
    /* reallocate memory for a double-pointer from 'n' to 2 * 'n'
     * returns pointer to reallocated block on success, exit on
     * failure
     */
    void *xrealloc_dp (void *ptr, size_t *n)
    {
        void **p = ptr;
        void *tmp = realloc (p, 2 * *n * sizeof tmp);
        if (!tmp) {
            fprintf (stderr, "xrealloc_dp() error: virtual memory exhausted.\n");
            exit (EXIT_FAILURE);
        }
        p = tmp;
        memset (p + *n, 0, *n * sizeof tmp); /* set new pointers NULL */
        *n *= 2;
    
        return p;
    }
    

    The xcalloc and xrealloc_dp functions are just functions that do error checking for calloc and realloc (for double-pointer) to keep the main body and logic of the code clear. I ran the code on the following test input and received the following output:

    Input Test File (with blank-lines)

    $ cat ../dat/idhostaddrport.txt
    keypath: /home/username/.ssh/id_rsaNOPASSWORD
    logfreq: 2
    hashfreq: 250
    
    id: test1
    hostaddr: XXX.XXX.XXX.XXX
    port: 221
    username: hpc1
    path: /home/hpc1/
    
    id: test2
    hostaddr: XXX.XXX.XXX.XXX
    port: 222
    username: hpc2
    path: /home/hpc2/
    
    id: test3
    hostaddr: XXX.XXX.XXX.XXX
    port: 223
    username: hpc3
    path: /home/hpc3/
    
    id: test4
    hostaddr: XXX.XXX.XXX.XXX
    port: 3844
    username: uname
    path: /home/uname/hpc/
    

    Output

    $ ./bin/getline_sscanf_dbg ../dat/idhostaddrport.txt
    
     keypath  : /home/username/.ssh/id_rsaNOPASSWORD
     logfreq  : 2
     hashfreq : 250
    
     id       : test1
     hostaddr : XXX.XXX.XXX.XXX
     port     : 221
     username : hpc1
     path     : /home/hpc1/
    
     id       : test2
     hostaddr : XXX.XXX.XXX.XXX
     port     : 222
     username : hpc2
     path     : /home/hpc2/
    
     id       : test3
     hostaddr : XXX.XXX.XXX.XXX
     port     : 223
     username : hpc3
     path     : /home/hpc3/
    
     id       : test4
     hostaddr : XXX.XXX.XXX.XXX
     port     : 3844
     username : uname
    

    One thing you should work toward is ways to further validate your input. Insuring that if you read an id, you read the remaining values hostaddr through path for each id as well. One simple variation that adds minimal checks would be the following change to the read loop:

        /* read each line in file */
        while ((read = getline (&line, &len, fp)) != -1) {
    
            if (read == 1) continue;    /* skip blank lines */
    
            char tag[TSIZE] = {0};
            char val[VSIZE] = {0};
    
            /* separate tag and value (val) */
            if (sscanf (line, "%s %[^\n]%*c", tag, val) != 2) {
                fprintf (stderr, "error: sscanf conversion failed.\n");
                break;
            }
    
            /* handle keypath, logfreq, hashfreq */
            if (strcmp (tag, "keypath:") == 0) {
                keypath = strdup (val);
                continue;
            }
    
            if (strcmp (tag, "logfreq:") == 0) {
                logfreq = (int)strtol (val, NULL, 10);
                continue;
            }
    
            if (strcmp (tag, "hashfreq:") == 0) {
                hashfreq = (int)strtol (val, NULL, 10);
                continue;
            }
    
            /* allocate space for args[idx] if "id:", then
            handle id, hostaddr, port, username, path */
            if (strcmp (tag, "id:") == 0) {
    
                args[idx] = calloc (1, sizeof **args);
                args[idx]->id = strdup (val);
                size_t tagseq = 0;
    
                while ((read = getline (&line, &len, fp)) != -1) {
    
                    if (read == 1) continue;    /* skip blank lines */
    
                    /* separate tag and value (val) */
                    if (sscanf (line, "%s %[^\n]%*c", tag, val) != 2) {
                        fprintf (stderr, "error: sscanf conversion failed.\n");
                        break;
                    }
    
                    if (strcmp (tag, "hostaddr:") == 0) {
                        if (tagseq != 0) { 
                            fprintf (stderr, "error: tagseq failed for hostaddr.\n");
                            exit (EXIT_FAILURE);
                        }
                        args[idx]->hostaddr = strdup (val);
                        tagseq++;
                        continue;
                    }
    
                    if (strcmp (tag, "port:") == 0) {
                        if (tagseq != 1) { 
                            fprintf (stderr, "error: tagseq failed for port.\n");
                            exit (EXIT_FAILURE);
                        }
                        args[idx]->port = (uint16_t)strtoul (val, NULL, 10);
                        tagseq++;
                        continue;
                    }
    
                    if (strcmp (tag, "username:") == 0) {
                        if (tagseq != 2) { 
                            fprintf (stderr, "error: tagseq failed for username.\n");
                            exit (EXIT_FAILURE);
                        }
                        args[idx]->uname = strdup (val);
                        tagseq++;
                        continue;
                    }
    
                    /* increment idx on path */
                    if (strcmp (tag, "path:") == 0) {
                        if (tagseq != 3) { 
                            fprintf (stderr, "error: tagseq failed for path.\n");
                            exit (EXIT_FAILURE);
                        }
                        args[idx++]->path = strdup (val);
                        break;
                    }
                }
            }
    
            if (idx == nargs) /* check idx, realloc */
                args = xrealloc_dp (args, &nargs);
        }
        fclose (fp);
    

    Take a look at both and let me know if you have any additional questions.