Beginning to learn C; I'm trying to read and process characters as I come to them while discarding whitespace characters. I need to also determine if it's a comment '#' or the first character of the next input value. The goal is to obtain and write the magic number, width, height, and maxval to a new file.
int main (int argc, char **argv)
{
char buffer[100];
int num_chars = 0;
FILE *input;
input = fopen(argv[1], "rb");
FILE *output;
output = fopen(argv[2], "w");
if (input == NULL){
printf("ERROR: Input file needed!");
return -1;
}
if (output == NULL){
printf("ERROR: Output file needed!");
return -1;
}
for (int i = fgetc(input); i != EOF; i = fgetc(input)) {
if (isspace(i)){
printf("The magic number is: %c\n", i);
ungetc(i, input);
} else if (i == '#') {
for (int j = i; isspace(j); j = fgetc(input)){
buffer[num_chars++] = j;
printf("Comment found: %c\n", j);
ungetc(i, input);
}
}
}
return 0;
}
Edit: utilized suggested isspace function but have created an infinite loop without seeing how. I apologize but I'm not sure what sort of questions I am suppose to be asking.
The Netpbm formats are a bit odd, in that many programmers initially read them incorrectly.
Simply put, the "magic number" (P1
to P7
) must be at the beginning of the file, followed by the header fields, followed by a single whitespace character, followed by the data. The trick is that each header field may be preceded by whitespace and/or comment, and the header is followed by a single whitespace character.
The P7
format, Portable Arbitrary Map file, has named header fields, but it is rarely supported format anyway, so I'll limit to the common P1
to P6
formats only. (To support its header fields, you only need another helper function, though.)
You need four helper functions:
A function to convert decimal digit to its numeric value.
static int decimal_digit(const int c)
{
switch (c) {
case '0': return 0;
case '1': return 1;
case '2': return 2;
case '3': return 3;
case '4': return 4;
case '5': return 5;
case '6': return 6;
case '7': return 7;
case '8': return 8;
case '9': return 9;
default: return -1;
}
}
Often, you'll see this shortened to (c - '0')
, (or ((c >= '0' && c <= '9') ? (c - '0') : -1)
if you want an expression equivalent to the function above), but that only works if the operating system uses a character set where decimal digits are consecutive code points. (They are, except on machines using EBCDIC or some other non-ASCII compatible character sets; very rare nowadays.)
A function to read the magic number.
int pnm_magic(FILE *in)
{
int c;
if (!in || ferror(in))
return -2; /* Invalid file handle. */
c = getc(in);
if (c != 'P')
return -1; /* Not a NetPBM file. */
switch (getc(in)) {
case '1': return 1; /* Ascii PBM */
case '2': return 2; /* Ascii PGM */
case '3': return 3; /* Ascii PPM */
case '4': return 4; /* Binary PBM */
case '5': return 5; /* Binary PGM */
case '6': return 6; /* Binary PPM */
/* case '7': return 7; for Portable Arbitrary map file */
default: return -1; /* Unknown format */
}
}
Using a helper function to parse the magic number is not absolutely necessary, of course, but using one definitely makes your code easier to read, verify, and maintain. So using one is a good thing.
A function to read the whitespace character at the end of the header.
int pnm_endheader(FILE *in)
{
int c;
if (!in || ferror(in))
return -1; /* Invalid file handle. */
c = getc(in);
/* Whitespace? */
if (c == '\t' || c == '\n' || c == '\v' ||
c == '\f' || c == '\r' || c == ' ')
return 0;
/* Nope, error. Don't consume the bad character. */
if (c != EOF)
ungetc(c, in);
return -1;
}
Note that this function returns 0 if successful, nonzero if an error occurs.
A function to parse a header field value, a nonnegative integer.
Note that this function skips leading whitespace and comments, but leaves the character that terminated the value in the stream (via ungetc()
).
int pnm_value(FILE *in)
{
unsigned int val, old;
int c, digit;
if (!in || ferror(in))
return -1; /* Invalid file handle. */
/* Skip leading ASCII whitespace and comments. */
c = getc(in);
while (c == '\t' || c == '\n' || c == '\v' ||
c == '\f' || c == '\r' || c == ' ' || c == '#')
if (c == '#') {
/* Skip the rest of the comment */
while (c != EOF && c != '\n' && c != '\r')
c = getc(in);
} else
c = getc(in);
/* Parse initial decimal digit of value. */
val = decimal_digit(c);
if (val < 0)
return -2; /* Invalid input. */
while (1) {
c = getc(in);
/* Delimiter? End of input? */
if (c == '\t' || c == '\n' || c == '\v' ||
c == '\f' || c == '\r' || c == ' ' || c == '#') {
/* Do not consume the character following the value. */
ungetc(c, in);
return val;
} else
if (c == EOF)
return val;
/* Is it a decimal digit? */
digit = decimal_digit(c);
if (digit < 0)
return -2; /* Invalid input. */
/* Convert, checking for overflow. */
old = val;
val = (val * 10) + digit;
if (val / 10 != old)
return -3; /* Overflow. */
}
}
Remember:
P1
and P4
formats have two header fields: width and height, in that order.
P2
, P3
, P5
, and P6
formats have three header fields: width, height, and maxval.
You can use fscanf(handle, "%u", &value)
to read each pixel from P1
and P2
format files, assuming unsigned int value;
. It will return 1 if successful. For P1
, value will be 0 or 1; for P2
, it will be from 0 to maxval, inclusive.
You can use fscanf(handle, "%u %u %u", &red, &green, &blue)
to read each pixel from P3
format files, assuming unsigned int red, green, blue;
. It will return 3 if successful. Then, each component will be from 0 to maxval, inclusive.
P4
format is the nastiest to read. It is best done one row of pixels at once, using fread(buf, width, 1, handle)
, with unsigned char buf[width];
or a similar-sized dynamically allocated array. Then, pixel x
is !!(buf[x/8] & (1 << (x & 7)))
(0 being white, 1 being black; with x from 0 to width-1). (The !!
is a double-not, or not-not operator: It yields 0 if the argument is 0, and 1 otherwise.)
For P5
format, if maxval >= 256, then each pixel consists of two bytes. You can use
static float p5_gray(FILE *in, int maxval)
{
if (maxval >= 256 && maxval < 65536) {
int hi, lo;
hi = fgetc(in);
lo = fgetc(in);
if (lo == EOF)
return -1.0f;
return (float)(hi*256 + lo) / (float)maxval;
} else
if (maxval >= 1 && maxval < 256) {
int val;
val = fgetc(in);
if (val == EOF)
return -1.0f;
return (float)val / (float)maxval;
} else
return -2.0f;
}
to read each pixel from P5
format. The function returns 0.0f for white, 1.0f for black.
For P6
format, if maxval >= 256, then each pixel is 6 bytes; otherwise each pixel is three bytes. You can use e.g.
static int p6_rgb(FILE *in, int maxval, float *red, float *green, float *blue)
{
const float max = (float)maxval;
unsigned char buf[6];
if (maxval >= 256 && maxval < 65536) {
if (fread(buf, 6, 1, in) != 1)
return -1; /* Error! */
if (red)
*red = (float)(buf[0]*256 + buf[1]) / max;
if (green)
*green = (float)(buf[2]*256 + buf[1]) / max;
if (blue)
*blue = (float)(buf[4]*256 + buf[5]) / max;
return 0;
} else
if (maxval >= 1 && maxval < 256) {
if (fread(buf, 3, 1, in) != 1)
return -1; /* Error! */
if (red)
*red = (float)buf[0] / max;
if (green)
*green = (float)buf[1] / max;
if (blue)
*blue = (float)buf[2] / max;
return 0;
} else
return -2; /* Invalid maxval */
}
to read each pixel from a P6
format file.
So, if in
is an open file handle (or say stdin
), and you have int format, width, height, maxval;
, you can do
format = pnm_magic(in);
if (format < 1 || format > 6) {
/* Unrecognized format; fail! */
}
width = pnm_value(in);
if (width <= 0) {
/* Invalid width; fail! */
}
height = pnm_value(in);
if (height <= 0) {
/* Invalid height; fail! */
}
if (format == 2 || format == 3 || format == 5 || format == 6) {
maxval = pnm_value(in);
if (maxval < 1 || maxval > 65535) {
/* Invalid maxval; fail! */
}
}
if (pnm_endheader(in)) {
/* Bad end of header; fail! */
}
to parse the header, leaving the file position at the beginning of the pixel data.