Search code examples
cstructscanfeofconversion-specifier

Trouble loading a csv into a struct where the last member is as an array of char


I want to read a csv file and load it into an array of struct. I used the code that I found on youtube and github (https://github.com/portfoliocourses/c-example-code/blob/main/csv_to_struct_array.c). Now I want to change all the members of the struct into an array of character (or a string). This works if I change sequentially each struct (member type and age at least) but if I change the member average into an array of character, I get the error message File format incorrect. I am suspecting thatit si because there is a problem with the character EOF in the file during the while (!feof(file)); loop. How could I solve the problem?

Here is the original code:


/*******************************************************************************
*
* Program: Read CSV File Data To An Array Of Structs
* 
* Description: Example of reading CSV file data into an array of structs in C.
*
* YouTube Lesson: https://www.youtube.com/watch?v=rbVt5v8NNe8 
*
* Author: Kevin Browne @ https://portfoliocourses.com
*
*******************************************************************************/

#include <stdio.h>

// A struct for representing student data in a file formatted like this:
//
// U,Virat Kohli,23,95.6
// U,Serena Williams,22,83.2
// G,Wayne Gretzky,19,84.2
//
// with a "student type" (e.g. undergraduate, graduate) single character, 
// followed by the student's name, age and then average.
//
typedef struct 
{
  // members for the student's type, name, age and average
  char type;
  char name[50];
  int age;
  double average;
} Student;

int main(void)
{
  // file pointer variable for accessing the file
  FILE *file;
  
  // attempt to open file.txt in read mode to read the file contents
  file = fopen("file.txt", "r"); 
  
  // if the file failed to open, exit with an error message and status
  if (file == NULL)
  {
    printf("Error opening file.\n");
    return 1;
  }
  
  // array of structs for storing the Student data from the file
  Student students[100];
  
  // read will be used to ensure each line/record is read correctly
  int read = 0;
  
  // records will keep track of the number of Student records read from the file
  int records = 0;

  // read all records from the file and store them into the students array
  do 
  {
    // Read a line/record from the file with the above format, notice in 
    // particular how we read in the student's name with %49[^,] which matches
    // up to 49 characters NOT including the comma (so it will stop matching 
    // at the next comma).  The name member can store 50 characters, so 
    // factoring in the NULL terminator this is the maximum amount of characters
    // we can read in for a number.  fscanf() will return the number of values 
    // it was able to read successfully which we expect to be 4, and we store 
    // that into read.
    //
    read = fscanf(file,
                  "%c,%49[^,],%d,%lf\n",
                  &students[records].type, 
                  students[records].name, 
                  &students[records].age, 
                  &students[records].average); 
    
    // if fscanf read 4 values from the file then we've successfully read 
    // in another record
    if (read == 4) records++;
    
    // The only time that fscanf should NOT read 4 values from the file is 
    // when we've reached the end of the file, so if fscanf did not read in 
    // exactly 4 values and we're not at the end of the file, there has been
    // an error (likely due to an incorrect file format) and so we exit with 
    // an error message and status.
    if (read != 4 && !feof(file))
    {
      printf("File format incorrect.\n");
      return 1;
    }
    
    // if there was an error reading from the file exit with an error message 
    // and status
    if (ferror(file))
    {
      printf("Error reading file.\n");
      return 1;
    }

  } while (!feof(file));

  // close the file as we are done working with it
  fclose(file);
  
  // print out the number of records read
  printf("\n%d records read.\n\n", records);
  
  // print out each of the records that was read 
  for (int i = 0; i < records; i++)
    printf("%c %s %d %.2f\n", 
           students[i].type, 
           students[i].name,
           students[i].age,
           students[i].average);
  printf("\n");

  return 0;
}


Here is the modified code:

#include <stdio.h>

typedef struct 
{
  //char type; \\original commented code
  //char name[50]; \\original commented code
  //int age; \\original commented code
  //double average; \\original commented code
  char type[50];
  char name[50];
  char age[50];
  char average[50];
} Student;

int main(void)
{
  FILE *file;
  file = fopen("file.txt", "r"); 
  if (file == NULL)
  {
    printf("Error opening file.\n");
    return 1;
  }
  Student students[100];
  int read = 0;
  int records = 0;

  do 
  {
    read = fscanf(file,
                  //"%c,%49[^,],%d,%lf\n" \\original commented code
                  "%49[^,],%49[^,],%49[^,],%49[^,]\n",
                  students[records].type, 
                  students[records].name, 
                  students[records].age, 
                  students[records].average); 
                  //students[records].average); 
    
    if (read == 4) records++;

    if (read != 4 && !feof(file))
    {
      printf("File format incorrect.\n");
      return 1;
    }
    
    if (ferror(file))
    {
      printf("Error reading file.\n");
      return 1;
    }

  } while (!feof(file));

  fclose(file);
  
  printf("\n%d records read.\n\n", records);
  
  for (int i = 0; i < records; i++)
    //printf("%c %s %d %.2f\n", \\original commented code
    printf("%s %s %s %.s\n", 
           students[i].type, 
           students[i].name,
           students[i].age,
           students[i].average);
  printf("\n");

  return 0;
}

EDIT 1 I changed the " %49[^,],%49[^,],%49[^,],%49[^\n]",

and got this output:


3 records read.

U Virat Kohli 23
U Serena Williams 22
G Wayne Gretzky 19

There is an improvement in the sense that the file can be parsed but the last column (or item after the last comma of each line) disappears as it should read:

3 records read.

U Virat Kohli 23 95.60
U Serena Williams 22 83.20
G Wayne Gretzky 19 84.20

Here is the source code of the modified version

#include <stdio.h>

typedef struct 
{
  //char type; \\original commented code
  //char name[50]; \\original commented code
  //int age; \\original commented code
  //double average; \\original commented code
  char type[50];
  char name[50];
  char age[50];
  char average[50];
} Student;

int main(void)
{
  FILE *file;
  file = fopen("file.txt", "r"); 
  if (file == NULL)
  {
    printf("Error opening file.\n");
    return 1;
  }
  Student students[100];
  int read = 0;
  int records = 0;

  do 
  {
    read = fscanf(file,
                  //"%c,%49[^,],%d,%lf\n" \\original commented code
                  //"%49[^,],%49[^,],%49[^,],%49[^,\n]",
                  " %49[^,],%49[^,],%49[^,],%49[^\n]",
                  students[records].type, 
                  students[records].name, 
                  students[records].age, 
                  students[records].average); 
                  //students[records].average); 
    
    if (read == 4) records++;

    if (read != 4 && !feof(file))
    {
      printf("File format incorrect.\n");
      return 1;
    }
    
    if (ferror(file))
    {
      printf("Error reading file.\n");
      return 1;
    }

  } while (!feof(file));

  fclose(file);
  
  printf("\n%d records read.\n\n", records);
  
  for (int i = 0; i < records; i++)
    //printf("%c %s %d %.2f\n", \\original commented code
    printf("%s %s %s %.s\n", 
           students[i].type, 
           students[i].name,
           students[i].age,
           students[i].average);
  printf("\n");

  return 0;
}

with the source record file.txt (copied from the original code):

U,Virat Kohli,23,95.6
U,Serena Williams,22,83.2
G,Wayne Gretzky,19,84.2


Solution

  • Rewrite the format string like

    " %49[^,],%49[^,],%49[^,],%49[^\n]",
    

    Pay attention to the leading space in the format string. It allows to skip white space characters.

    I assume that whole records are not ended with a comma.

    Another approach is to declare a character array large enough to store a record from the file and to use fgets instead of scanf. Then you can parse an obtained record using either strtok or sscanf.

    Also in the call of printf

    printf("%s %s %s %.s\n", 
           students[i].type, 
           students[i].name,
           students[i].age,
           students[i].average);
    

    there is incorrect conversion specifier %.s. Instead just write %s.