Search code examples
cgdb

Why "%79[^\n]\n" is causing a segmentation fault?


#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main()
{
        char line[80];
        FILE *in = fopen("spooky.csv", "r");
        FILE *file1 = fopen("ufos.csv", "w");
        FILE *file2 = fopen("disappearances.csv", "w");
        FILE *file3 = fopen("others.csv", "w");
        while (fscanf(in, "%79[^\n]\n", line) == 1) {
                if (strstr(line, "UFO"))
                        fprintf(file1, "%s\n", line);
                else if (strstr(line, "Disappearance"))
                        fprintf(file2, "%s\n", line);
                else
                        fprintf(file3, "%s\n", line);
        }
        fclose(file1);
        fclose(file2);
        fclose(file3);
        return 0;
}

This code is returning a runtime error of segmentation fault.

enter image description here

Does it matter on what hardware I'm compiling this program? I'm using a Fedora 38 (Workstation) with Intel Core i7 (7th Gen).


Solution

  • The problem has nothing to do with the fscanf format: as indicated in the error message, the argument s to the internal function __vfscanf_internal has the value 0x0 indicating a null pointer passed for the FILE * argument.

    You should check for fopen failure to open the files and report this condition with a meaningful error message.

    Also note these remarks:

    • you should close in.
    • you could use fgets() instead of the fscanf() format.
    • the fscanf format will fail converting an initial empty line in spooky.csv, which should not occur if it is an actual CSV file.
    • the trailing \n in the conversion specification will cause the newline to be consumed along with any initial white space in the next line, which may not be intended. Use %79[^\n]%*[\n] to avoid this, yet consume subsequent empty lines.
    • the cde will silently break long lines into chunks of 79 bytes. Modifying the code for arbitrary long lines is non trivial, but a larger buffer might be a good option.
    • you might want to special case the first line if it is expected to be a header line with the field names.

    Here is a modified version:

    #include <errno.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    FILE *fopen_check(const char *filename, const char *mode) {
        FILE *fp = fopen(filename, mode);
        if (fp == NULL) {
            fprintf(stderr, "cannot open %s: %s\n", filename, strerror(errno));
            exit(1);
        }
        return fp;
    }
    
    int main(void) {
        char line[80];
    
        FILE *in = fopen_check("spooky.csv", "r");
        FILE *file1 = fopen_check("ufos.csv", "w");
        FILE *file2 = fopen_check("disappearances.csv", "w");
        FILE *file3 = fopen_check("others.csv", "w");
    
        fscanf(in, "%*[\n]");  // skip initial empty lines
    
        while (fscanf(in, "%79[^\n]%*[\n]", line) == 1) {
            if (strstr(line, "UFO"))
                fprintf(file1, "%s\n", line);
            else
            if (strstr(line, "Disappearance"))
                fprintf(file2, "%s\n", line);
            else
                fprintf(file3, "%s\n", line);
        }
        fclose(in);
        fclose(file1);
        fclose(file2);
        fclose(file3);
        return 0;
    }