Search code examples
cppm

Read ppm header information and output to console and output file in C


Beginning to learn C; I'm trying to read and process characters as I come to them while discarding whitespace characters. I need to also determine if it's a comment '#' or the first character of the next input value. The goal is to obtain and write the magic number, width, height, and maxval to a new file.

int main (int argc, char **argv)
{

char buffer[100];
int num_chars = 0;
FILE *input;
input = fopen(argv[1], "rb");

FILE *output;
output = fopen(argv[2], "w");

if (input == NULL){
    printf("ERROR: Input file needed!");
    return -1;
}

if (output == NULL){
    printf("ERROR: Output file needed!");
    return -1;
}

for (int i = fgetc(input); i != EOF; i = fgetc(input)) {

    if (isspace(i)){
        printf("The magic number is: %c\n", i);
        ungetc(i, input);
    } else if (i == '#') {
        for (int j = i; isspace(j); j = fgetc(input)){
            buffer[num_chars++] = j;
            printf("Comment found: %c\n", j);
            ungetc(i, input);
        }
    }


}


return 0;
}

Edit: utilized suggested isspace function but have created an infinite loop without seeing how. I apologize but I'm not sure what sort of questions I am suppose to be asking.


Solution

  • The Netpbm formats are a bit odd, in that many programmers initially read them incorrectly.

    Simply put, the "magic number" (P1 to P7) must be at the beginning of the file, followed by the header fields, followed by a single whitespace character, followed by the data. The trick is that each header field may be preceded by whitespace and/or comment, and the header is followed by a single whitespace character.

    The P7 format, Portable Arbitrary Map file, has named header fields, but it is rarely supported format anyway, so I'll limit to the common P1 to P6 formats only. (To support its header fields, you only need another helper function, though.)

    You need four helper functions:

    1. A function to convert decimal digit to its numeric value.

      static int  decimal_digit(const int c)
      {
          switch (c) {
          case '0': return 0;
          case '1': return 1;
          case '2': return 2;
          case '3': return 3;
          case '4': return 4;
          case '5': return 5;
          case '6': return 6;
          case '7': return 7;
          case '8': return 8;
          case '9': return 9;
          default: return -1;
          }
      }
      

      Often, you'll see this shortened to (c - '0'), (or ((c >= '0' && c <= '9') ? (c - '0') : -1) if you want an expression equivalent to the function above), but that only works if the operating system uses a character set where decimal digits are consecutive code points. (They are, except on machines using EBCDIC or some other non-ASCII compatible character sets; very rare nowadays.)
       

    2. A function to read the magic number.

      int  pnm_magic(FILE *in)
      {
          int  c;
      
          if (!in || ferror(in))
              return -2;  /* Invalid file handle. */
      
          c = getc(in);
          if (c != 'P')
              return -1;  /* Not a NetPBM file. */
      
          switch (getc(in)) {
          case '1': return 1;  /* Ascii PBM */
          case '2': return 2;  /* Ascii PGM */
          case '3': return 3;  /* Ascii PPM */
          case '4': return 4;  /* Binary PBM */
          case '5': return 5;  /* Binary PGM */
          case '6': return 6;  /* Binary PPM */
          /* case '7': return 7; for Portable Arbitrary map file */
          default: return -1;  /* Unknown format */ 
          }
      }
      

      Using a helper function to parse the magic number is not absolutely necessary, of course, but using one definitely makes your code easier to read, verify, and maintain. So using one is a good thing.
       

    3. A function to read the whitespace character at the end of the header.

      int  pnm_endheader(FILE *in)
      {
          int  c;
      
          if (!in || ferror(in))
              return -1;  /* Invalid file handle. */
      
          c = getc(in);
      
          /* Whitespace? */
          if (c == '\t' || c == '\n' || c == '\v' ||
              c == '\f' || c == '\r' || c == ' ')
              return 0;
      
          /* Nope, error. Don't consume the bad character. */
          if (c != EOF)
              ungetc(c, in);
      
          return -1;
      }
      

      Note that this function returns 0 if successful, nonzero if an error occurs.
       

    4. A function to parse a header field value, a nonnegative integer.

      Note that this function skips leading whitespace and comments, but leaves the character that terminated the value in the stream (via ungetc()).

      int  pnm_value(FILE *in)
      {
          unsigned int  val, old;
          int           c, digit;
      
          if (!in || ferror(in))
              return -1;  /* Invalid file handle. */
      
          /* Skip leading ASCII whitespace and comments. */
          c = getc(in);
          while (c == '\t' || c == '\n' || c == '\v' ||
                 c == '\f' || c == '\r' || c == ' ' || c == '#')
              if (c == '#') {
                  /* Skip the rest of the comment */
                  while (c != EOF && c != '\n' && c != '\r')
                      c = getc(in);
              } else
                  c = getc(in);
      
          /* Parse initial decimal digit of value. */
          val = decimal_digit(c);
          if (val < 0)
              return -2; /* Invalid input. */
      
          while (1) {
              c = getc(in);
      
              /* Delimiter? End of input? */
              if (c == '\t' || c == '\n' || c == '\v' ||
                  c == '\f' || c == '\r' || c == ' ' || c == '#') {
                  /* Do not consume the character following the value. */
                  ungetc(c, in);
                  return val;
              } else
              if (c == EOF)
                  return val;
      
              /* Is it a decimal digit? */
              digit = decimal_digit(c);
              if (digit < 0)
                  return -2; /* Invalid input. */
      
              /* Convert, checking for overflow. */
              old = val;
              val = (val * 10) + digit;
              if (val / 10 != old)
                  return -3; /* Overflow. */
          }
      }
      

    Remember:

    • P1 and P4 formats have two header fields: width and height, in that order.

    • P2, P3, P5, and P6 formats have three header fields: width, height, and maxval.

    • You can use fscanf(handle, "%u", &value) to read each pixel from P1 and P2 format files, assuming unsigned int value;. It will return 1 if successful. For P1, value will be 0 or 1; for P2, it will be from 0 to maxval, inclusive.

    • You can use fscanf(handle, "%u %u %u", &red, &green, &blue) to read each pixel from P3 format files, assuming unsigned int red, green, blue;. It will return 3 if successful. Then, each component will be from 0 to maxval, inclusive.

    • P4 format is the nastiest to read. It is best done one row of pixels at once, using fread(buf, width, 1, handle), with unsigned char buf[width]; or a similar-sized dynamically allocated array. Then, pixel x is !!(buf[x/8] & (1 << (x & 7))) (0 being white, 1 being black; with x from 0 to width-1). (The !! is a double-not, or not-not operator: It yields 0 if the argument is 0, and 1 otherwise.)

    • For P5 format, if maxval >= 256, then each pixel consists of two bytes. You can use

      static float p5_gray(FILE *in, int maxval)
      {
          if (maxval >= 256 && maxval < 65536) {
              int  hi, lo;
              hi = fgetc(in);
              lo = fgetc(in);
              if (lo == EOF)
                  return -1.0f;
              return (float)(hi*256 + lo) / (float)maxval;
          } else
          if (maxval >= 1 && maxval < 256) {
              int  val;
              val = fgetc(in);
              if (val == EOF)
                  return -1.0f;
              return (float)val / (float)maxval;
          } else
              return -2.0f;
      }
      

      to read each pixel from P5 format. The function returns 0.0f for white, 1.0f for black.

    • For P6 format, if maxval >= 256, then each pixel is 6 bytes; otherwise each pixel is three bytes. You can use e.g.

      static int p6_rgb(FILE *in, int maxval, float *red, float *green, float *blue)
      {
          const float    max = (float)maxval;
          unsigned char  buf[6];
      
          if (maxval >= 256 && maxval < 65536) {
              if (fread(buf, 6, 1, in) != 1)
                  return -1; /* Error! */
              if (red)
                  *red = (float)(buf[0]*256 + buf[1]) / max;
              if (green)
                  *green = (float)(buf[2]*256 + buf[1]) / max;
              if (blue)
                  *blue = (float)(buf[4]*256 + buf[5]) / max;
              return 0;
          } else
          if (maxval >= 1 && maxval < 256) {
              if (fread(buf, 3, 1, in) != 1)
                  return -1; /* Error! */
              if (red)
                  *red = (float)buf[0] / max;
              if (green)
                  *green = (float)buf[1] / max;
              if (blue)
                  *blue = (float)buf[2] / max;
              return 0;
          } else
              return -2; /* Invalid maxval */
      }
      

      to read each pixel from a P6 format file.

    So, if in is an open file handle (or say stdin), and you have int format, width, height, maxval;, you can do

    format = pnm_magic(in);
    if (format < 1 || format > 6) {
        /* Unrecognized format; fail! */
    }
    
    width = pnm_value(in);
    if (width <= 0) {
        /* Invalid width; fail! */
    }
    
    height = pnm_value(in);
    if (height <= 0) {
        /* Invalid height; fail! */
    }
    
    if (format == 2 || format == 3 || format == 5 || format == 6) {
        maxval = pnm_value(in);
        if (maxval < 1 || maxval > 65535) {
            /* Invalid maxval; fail! */
        }
    }
    
    if (pnm_endheader(in)) {
        /* Bad end of header; fail! */
    }
    

    to parse the header, leaving the file position at the beginning of the pixel data.