Want to parse the filename from the command line and check its correctness, such as (1) total length, (2) expected extension, (3) '_' position, and other input values.
The sequence should be as follows:
$check.exe input_file L2A30000_0102051303042026_0001.dat
It should check if the output file (L2A30000_0102051303042026_0001.dat) is typed as it should (not by the exact values, but by the type and length).
// Function to check if a string consists of digits
int isNumeric(const char *str) {
while (*str) {
if (!isdigit(*str)) {
return 0; // Not a digit
}
str++;
}
return 1; // All characters are digits
}
int main(int argc, char *argv[]) {
// Check if the correct number of command line arguments is
provided
if (argc != 3) {
printf("Usage: %s inputfile outputfile\n", argv[0]);
return 1;
}
// Extract the output file name from the command line arguments
const char *outputFileName = argv[2];
// Define the expected format
char asciiChar1, numChar1, asciiChar2, numChar2, numChar3[5],
underscore1, numChar4[17], underscore2, numChar5[5],
numChar6[4], extension[4];
int result = sscanf(outputFileName,
"%c%c%c%c%4[0-9]%c%16[0-9]%c%1[0-9]%3[0-9]_%3[0-9]%4[.dat]",
&asciiChar1, &numChar1, &asciiChar2,
&numChar2, numChar3, &underscore1, numChar4, &underscore2,
numChar5, numChar6, extension);
// Debugging print statement
printf("Debug: sscanf result: %d\n", result);
printf("Debug: asciiChar1: %c\n", asciiChar1);
printf("Debug: numChar1: %c\n", numChar1);
printf("Debug: asciiChar2: %c\n", asciiChar2);
printf("Debug: numChar2: %c\n", numChar2);
printf("Debug: numChar3: %s\n", numChar3);
printf("Debug: underscore1: %c\n", underscore1);
printf("Debug: numChar4: %s\n", numChar4);
printf("Debug: underscore2: %c\n", underscore2);
printf("Debug: numChar5: %s\n", numChar5);
printf("Debug: numChar6: %s\n", numChar6);
printf("Debug: extension: %s\n", extension);
// Check if the extracted values match the expected format
if (result != 12 || !isalpha(asciiChar1) || !isdigit(numChar1) ||
!isalpha(asciiChar2) || !isdigit(numChar2) ||
strlen(numChar3) != 4 || !isNumeric(numChar3) ||
strlen(numChar4) != 16 || !isNumeric(numChar4) ||
strlen(numChar5) != 4 || !isNumeric(numChar5) ||
strlen(numChar6) != 3 || !isNumeric(numChar6) ||
strlen(extension) != 3 || strcmp(extension, ".dat") != 0) {
printf("Error: Output file format is incorrect.\n");
return 1;
}
// If all checks pass, the output file format is correct
printf("Output file format is correct.\n");
return 0;
}
Command line input:
.\check.exe inputfile L2A30000_0102051303042026_0001.dat
This is the output I am getting:
Debug: sscanf result: 9
...
Debug: numChar5: 0001
Debug: extension:
Error: Output file format is incorrect.
This is the output I am expecting:
Debug: extension:.dat
This part is not working. Other parts are OK. Want to check if the extension
is .dat
or not for the filename. If not it will print error msg and exit.
I suggest you introduce some extra white space in the format string and matching arguments along these lines:
int result = sscanf(outputFileName,
"%c%c"
"%c%c"
"%4[0-9]"
"%c"
"%16[0-9]"
"%c" // underscore2
"%1[0-9]"
"%3[0-9]_%3[0-9]%4[.dat]",
&asciiChar1, &numChar1,
&asciiChar2, &numChar2,
numChar3,
&underscore1,
numChar4,
&underscore2,
numChar5,
numChar6,
extension
);
so we are good up to the 2nd underscore. Then you expect a number (char numChar5[5]
) but that doesn't match the size of the variable. Then 3 more numbers (char numChat6[4]
) which is ok. Then a 3rd underscore which isn't in the input. 3 more numbers which has no matching argument. "%4[.dat]" which result in a buffer overflow as the extensions variable is a char extension[4]
. Overall 12 format directives and 11 arguments which is undefined behavior.
You can simplify it by hard-coding your fixed strings:
#include <ctype.h>
#include <stdio.h>
#include <string.h>
int isNumeric(const char *str) {
for(; isdigit(*str); str++);
return !*str;
}
int main(int argc, char *argv[]) {
if (argc != 3) {
printf("Usage: %s inputfile outputfile\n", argv[0]);
return 1;
}
const char *outputFileName = argv[2];
char asciiChar1, numChar1, asciiChar2, numChar2, numChar3[5], numChar4[17], numChar5[5], extension[4];
int result = sscanf(outputFileName,
"%c%c"
"%c%c"
"%4[0-9]"
"_"
"%16[0-9]"
"_"
"%4[0-9]"
".dat",
&asciiChar1, &numChar1,
&asciiChar2, &numChar2,
numChar3,
numChar4,
numChar5
);
printf("Debug: sscanf result: %d\n", result);
printf("Debug: asciiChar1: %c\n", asciiChar1);
printf("Debug: numChar1: %c\n", numChar1);
printf("Debug: asciiChar2: %c\n", asciiChar2);
printf("Debug: numChar2: %c\n", numChar2);
printf("Debug: numChar3: %s\n", numChar3);
printf("Debug: numChar4: %s\n", numChar4);
printf("Debug: numChar5: %s\n", numChar5);
if (result != 7 || !isalpha(asciiChar1) || !isdigit(numChar1) ||
!isalpha(asciiChar2) || !isdigit(numChar2) ||
strlen(numChar3) != 4 || !isNumeric(numChar3) ||
strlen(numChar4) != 16 || !isNumeric(numChar4) ||
strlen(numChar5) != 4 || !isNumeric(numChar5)
) {
printf("Error: Output file format is incorrect.\n");
return 1;
}
printf("Output file format is correct.\n");
return 0;
}
with example run:
./a.out input_file L2A30000_0102051303042026_0001.dat
Debug: sscanf result: 7
Debug: asciiChar1: L
Debug: numChar1: 2
Debug: asciiChar2: A
Debug: numChar2: 3
Debug: numChar3: 0000
Debug: numChar4: 0102051303042026
Debug: numChar5: 0001
Output file format is correct.
Another approach would be to just parse the file name is_valid_format()
possible via a little interpreter is_valid_format2()
:
#include <ctype.h>
#include <stdio.h>
#include <string.h>
const char *alpha(const char *s) {
if(!s) return NULL;
if(!isalpha(*s)) return NULL;
return s + 1;
}
const char *digits(const char *s, size_t n) {
if(!s) return NULL;
for(size_t i = 0; i < n; i++)
if(!isdigit(s[i])) return NULL;
return s + n;
}
const char *str(const char *s, const char *s2) {
if(!s) return NULL;
size_t n = strlen(s2);
if(strncmp(s, s2, n)) return NULL;
return s + n;
}
int is_valid_filename(const char *s) {
s = alpha(s);
s = digits(s, 1);
s = alpha(s);
s = digits(s, 5);
s = str(s, "_");
s = digits(s, 16);
s = str(s, "_");
s = digits(s, 4);
s = str(s, ".dat");
return s && !*s;
}
int is_valid_filename2(const char *s) {
struct {
enum { ALPHA, DIGITS, STR } type;
union {
int n;
const char *s;
};
} format[] = {
{ ALPHA },
{ DIGITS, .n = 1 },
{ ALPHA },
{ DIGITS, .n = 5 },
{ STR, .s = "_" },
{ DIGITS, .n = 16 },
{ STR, .s = "_" },
{ DIGITS, .n = 4 },
{ STR, .s = ".dat" },
};
size_t n = sizeof format / sizeof *format;
for(size_t i = 0; s && i < n; i++) {
switch(format[i].type) {
case ALPHA:
s = alpha(s);
break;
case DIGITS:
s = digits(s, format[i].n);
break;
case STR:
s = str(s, format[i].s);
break;
}
}
return s && !*s;
}
int main(int argc, char *argv[]) {
if (argc != 3) {
printf("Usage: %s inputfile outputfile\n", argv[0]);
return 1;
}
char *result[] = { "invalid", "valid" };
printf("%s\n", result[is_valid_filename(argv[2])]);
printf("%s\n", result[is_valid_filename2(argv[2])]);
}