Search code examples
ccsvfile-handlingstrtokstrtol

strtol gives segfault when strtok returns NULL


#include <stdio.h>
#include <stdlib.h>
#include <string.h>

const char* getfield(char* line, int num) {
        const char* tok;
        for (tok = strtok(line, ",");
                tok && *tok;
                tok = strtok(NULL, ",\n"))
        {
            if (!--num)
                return tok;
        }
        return NULL;
    }

    int main()
    {
        FILE* stream = fopen("b.csv", "r");
        char line[1024];
        char *pstr;int num;
         const char* value;

        while (fgets(line, 1024, stream))
        {
            char* tmp = strdup(line);
        //printf("Field 3 would be %s\n", getfield(tmp, 3));    
        value=getfield(tmp, 3);
        num =strtol(value,&pstr,10);
        printf("Field 3 would be %d\n", num);
        // NOTE strtok clobbers tmp
            free(tmp);
        }
    }

/* b.csv

301,36,15
302,88,75

/ / my output

Field 3 would be 15
Field 3 would be 75

*/

issue is : /* b.csv

301,36,15
 302,88,
 ,,,34

if the the table is broken as above "strtok" returns NULL ,and so "strtol" gives "segfault"..how to resolve it ?

Here the main issue is if 2nd is not present, it treats 3rd as a second and gives segfault !! for example in a 3rd row of b.csv ",,,34" is there means 3rd value is present, but It behaves like "34" is a 1st value and 2nd and third are respectively NULL !!


Solution

  • Why can't you just check the value that you get from getfield(tmp, 3); for NULL and not call strtol if NULL is returned? Another way to get around is make a static char* not_found = ""; in getfield and return address to it instead of NULL, then strtol will not segfault.

    UPDATE

    Since i found that strtok really is helpless in this situation i tried to write code that does the same with strchr:

        #include <stdio.h>
        #include <stdlib.h>
        #include <string.h>
    
        char* getfield(char* line, int num) {
            char* tok = line;
            char* result;
            if (line)
            {
                do
                {
                    if (!--num)
                    {
                        tok = strchr(line, ',');
                        if (tok == NULL)
                        {
                            tok = &line[strlen(line)];
                        }
                        size_t fieldlen = tok - line;
                        if (fieldlen)
                        {
                            result = (char*)malloc(fieldlen+1);
                            result[fieldlen] = '\0';
                            strncpy(result, line, fieldlen);
                            return result;
                        }
                        else
                        {
                            break;
                        }
                    }
                    tok = strchr(line, ',');
                    line = tok + 1;
                } while (tok);
            }
            result = (char*)malloc(2);
            strcpy(result, "0");
            return result;
        }
    
        int main()
        {
            FILE* stream = fopen("b.csv", "r");
            char line[1024];
            char *pstr;int num;
            char* value;
    
            while (fgets(line, 1024, stream))
            {
                char* tmp = strdup(line);
                //printf("Field 3 would be %s\n", getfield(tmp, 3));    
                value=getfield(tmp, 3);
                num =strtol(value,&pstr,10);
                free(value);
                printf("Field 3 would be %d\n", num);
                // NOTE strtok clobbers tmp
                free(tmp);
            }
        }
    

    This worked on input file:

        10,,30
        10,
    

    The code returns 0 if nothing is found, you can change that, and the result is dynamically allocated. I hope this helps, the lesson for me is - avoid C when parsing strings :D