Search code examples
ccsvscanfstructurestrtok

How to read comma-separated csv file with `sscanf()`


I'm attempting to print an array of structures read from a CSV file in Excel. However, only the students' IDs are printed; other information was also printed but some confusing rare characters. Can you please tell me what could be wrong with this code?

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct student {
    char ID[8];
    char name[32];
    int score;
} student;

int main(int argc, char *argv[]) {
    student student_list[100];
    FILE *source = fopen("students.csv", "r");
    if (source == NULL) {
        perror("Unable to open the source file.");
        exit(1);
    }

    char buffer[1024];
    fgets(buffer, sizeof(buffer), source);
    int num_student = 0;
    while (!feof(source)) {
        student *one_student = student_list + num_student;
        sscanf(buffer, "%8[^,] %32[^,] %3[^,]",
               &one_student->ID, &one_student->name, &one_student->score);
        fgets(buffer, sizeof(buffer), source);
        num_student++;
    }
    for (int i = 0; i < num_student; i++) {
         printf("ID: %s  name: %-9s score: %-3d\n",
                student_list[i].ID, student_list[i].name, student_list[i].score);
    }
    fclose(source);
    return 0;
}

This is a sample input file students.csv:

B213350,John Adam Smith,80
B191835,Mary Elizabeth Smith,71
B201304,Yamazaki Fuyumi,95
B201832,Liam,57
B201834,Alfonso Hernández,65

Solution

  • There are multiple problems:

    • you should not use feof(). Read Why is “while ( !feof (file) )” always wrong?
      Use this loop instead:

        while (fgets(buffer, sizeof buffer, source)) {
            // handle the line
        }
      
    • the sscanf() format string is incorrect: the character counts are too large and the , are missing. It should be " %7[^,\n], %31[^,\n], %d" and you should check that the return value is 3, the number of successful conversions expected.

    • you should stop when the student array is full.

    Here is a modified version:

    #include <errno.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    typedef struct student {
        char ID[8];
        char name[32];
        int score;
    } student;
    
    int main(int argc, char *argv[]) {
        student student_list[100];
        FILE *source = fopen("students.csv", "r");
        if (source == NULL) {
            fprintf(stderr, "Cannot open file students.csv: %s\n", strerror(errno));
            return 1;
        }
    
        char buffer[1024];
        int num_student = 0;
        while (num_student < 100 && fgets(buffer, sizeof(buffer), source)) {
            student *one_student = &student_list[num_student];
            if (sscanf(buffer, " %7[^,\n], %31[^,\n], %d",
                       one_student->ID, one_student->name,
                       &one_student->score) == 3) {
                num_student++;
            } else {
                printf("invalid CSV line: %s", buffer);
            }
        }
        for (int i = 0; i < num_student; i++) {
             printf("ID: %-9s  name: %-32s score: %-3d\n",
                    student_list[i].ID, student_list[i].name,
                    student_list[i].score);
        }
        fclose(source);
        return 0;
    }
    

    Note that this approach to parsing CSV files cannot handle empty fields. Parsing the line with strtok() would not work either because consecutive commas would be handled as a single separator. You need a different approach using strcspn() or strchr().