I have been working on developing a lex scanner however when I feed it my input file it is producing the wrong output. Here is my source code:
%{
#include <stdio.h>
int NumberOfLines=0;
int NumberOfChar=0;
int NumberOfIntegers=0;
int KWCount=0;
int NumberOfComments=0;
%}
DIGIT [0-9]*
ID [a-z][a-z0-9]*
%x COMMENT
%option noyywrap
%%
^[\t]*"/*" {BEGIN COMMENT;}
^[\t]*"/*".*"*/"[\t]*\n {NumberOfComments++;}
<COMMENT>"*/"[\t]*\n {BEGIN 0; NumberOfComments++;}
<COMMENT>"*/" {BEGIN 0;}
<COMMENT>\n {NumberOfComments++;}
<COMMENT>.\n {NumberOfComments++;}
\n {NumberOfLines++, NumberOfChar++; NumberOfChar +=strlen(yytext);}
. {NumberOfChar++; NumberOfChar +=strlen(yytext);}
{DIGIT} {NumberOfIntegers++; NumberOfChar +=strlen(yytext); }
{DIGIT}+"."{DIGIT}* {
printf("A flot: %s (%g) \n", yytext, atof(yytext));
NumberOfChar +=strlen(yytext);
}
if|else|while|return {
printf("A keyword: %s\n", yytext); KWCount++;
NumberOfChar +=strlen(yytext);
}
{ID} {
printf("An identifier: %s\n", yytext);
NumberOfChar +=strlen(yytext);
}
"{"[^}\n]*"}" {
/*each up one-line comments*/
NumberOfChar +=strlen(yytext);
}
%%
int main(int argc, char **argv){
++argv, --argc; /*skip over program name */
if (argc > 0)
yyin = fopen(argv[0], "r");
else
yyin = stdin;
yylex();
printf("Character count: %d",NumberOfChar);
printf("\n");
printf("Number count: %d",NumberOfIntegers);
printf("\n");
printf("Keyword count: %d",KWCount);
printf("\n");
printf("Line count: %d",NumberOfLines);
printf("\n");
printf("Comment count: %d", NumberOfComments);
printf("\n");
return 0;
}
Whenever I run my input file with the source it give me the wrong output. For instance the output of the file should be:
Output:
Number of Keywords: 3
Number of Characters: 196
Number of Lines: 17
Number of Digits: 3
However the output it is currently producing is:
Output:
Number of keywords: 0
Number of Characters: 3
Number of Lines: 7
Number of Digits: 0
I suspect it has to do with my regular expressions, any help would be appreciated as I am still learning regex!
Here is my input file contents:
/*comment 1*/
/*comment
comment 2
*/
/*comment 3*
*/if this is a line
{comment 4}
int i = 789;
int j = 689;
if i == 172 then
{comment 5}
else
{comment 6}
{comment 7}
/*8 comments
*
*/
end
Here's some mostly working code, closely based on your code.
%{
#include <stdio.h>
int NumberOfLines=0;
int NumberOfChar=0;
int NumberOfIntegers=0;
int KWCount = 0;
int IDCount = 0;
int RCCount = 0;
int OCCount = 0;
int DTCount = 0;
int FLCount = 0;
%}
%option noyywrap
%option noinput
%option nounput
DIGIT [0-9]*
ID [a-z][a-z0-9]*
%%
\n {NumberOfLines++; NumberOfChar++; RCCount += strlen(yytext); }
. {NumberOfChar++; DTCount++; RCCount++; printf(" '%c'", yytext[0]); }
{DIGIT} {NumberOfIntegers++; RCCount += strlen(yytext); }
{DIGIT}+"."{DIGIT}* {
printf("\nA float: %s (%g) \n", yytext, atof(yytext));
RCCount += strlen(yytext);
FLCount++;
}
if|else|while|return {
printf("\nA keyword: %s\n", yytext);
KWCount++;
RCCount += strlen(yytext);
}
{ID} {
printf("\nAn identifier: %s\n", yytext);
IDCount++;
RCCount += strlen(yytext);
}
"{"[^}\n]*"}" {
RCCount += strlen(yytext);
OCCount += strlen(yytext);
}
%%
int main(int argc, char **argv){
++argv, --argc; /*skip over program name */
if (argc > 0)
yyin = fopen(argv[0], "r");
else
yyin = stdin;
yylex();
printf("Character count: %d\n", NumberOfChar);
printf("Number count: %d\n", NumberOfIntegers);
printf("Keyword count: %d\n", KWCount);
printf("Line count: %d\n", NumberOfLines);
printf("ID count: %d\n", IDCount);
printf("Dot count: %d\n", DTCount);
printf("Raw count: %d\n", RCCount);
printf("Float count: %d\n", FLCount);
printf("Other count: %d\n", OCCount);
printf("\n");
return 0;
}
When run on the data file:
/*commEnt 1*/
/*COMMENT
commEnt 2
*/
/*commEnt 3*
*/if this is a linE
{commEnt 4}
int i = 789;
int j = 689;
if i == 172 thEn
{commEnt 5}
ElsE
{commEnt 6}
{commEnt 7}
float 12.34
/*8 commEnts
*
else
return
while
the
going
is
good
*/
end
I get the output:
'/' '*'
An identifier: comm
'E'
An identifier: nt
' ' '1' '*' '/' '/' '*' 'C' 'O' 'M' 'M' 'E' 'N' 'T' ' ' ' '
An identifier: comm
'E'
An identifier: nt
' ' '2' ' ' ' ' ' ' '*' '/' ' ' ' ' '/' '*'
An identifier: comm
'E'
An identifier: nt
' ' '3' '*' ' ' ' ' ' ' '*' '/'
A keyword: if
' '
An identifier: this
' '
An identifier: is
' ' 'a' ' '
An identifier: lin
'E'
An identifier: int
' ' 'i' ' ' '=' ' ' ';' ' '
An identifier: int
' ' 'j' ' ' '=' ' ' ';'
A keyword: if
' ' 'i' ' ' '=' '=' ' ' ' '
An identifier: th
'E' 'n' ' ' 'E'
An identifier: ls
'E'
An identifier: float
' '
A float: 12.34 (12.34)
'/' '*' '8' ' '
An identifier: comm
'E'
An identifier: nts
' ' '*' ' '
A keyword: else
' '
A keyword: return
' '
A keyword: while
' '
An identifier: the
' '
An identifier: going
' '
An identifier: is
' '
An identifier: good
' ' '*' '/' ' '
An identifier: end
Character count: 115
Number count: 3
Keyword count: 5
Line count: 26
ID count: 21
Dot count: 89
Raw count: 258
Float count: 1
Other count: 44
The output from wc
is:
$ wc data.2
26 49 258 data.2
$
The 'raw count' of characters matches the character count from wc
; the line count matches too. The number of integers, floats, keywords and identifiers all looks correct, given that upper case letters are counted in the 'dot characters'. You can work out whether there are other problems; I think that the count of integers is wrong, but I'm not sure why.