I am new to flex and working on lexical analysis for c language. I want to output all keywords,identifiers,literals,operators and separators. This is my program lexer.l which does not work.
%{
#include<stdio.h>
int currentLine=1;
%}
%%
#include<.*> printf("%d\t<%s,%s>\n",currentLine,"include","PreProcessor");
#define[^\n]+ printf("%d\t<%s,%s>\n",currentLine,"define","PreProcessor");
= {printf("%d\t<%s,%s>\n",currentLine,yytext,"AssignmentOperator");}
int|short|signed|unsigned|long|double|float|char|void|enum|union|struct|auto|const|register|static|volatile|extern|typedef|if|else|while|do|for|switch|case|continue|break|default|sizeof|goto|return {printf("%d\t<%s,%s>\n",currentLine,yytext,"Keyword");}
[\t ] ;
\n currentLine++;
(\"[^\"]*\") {printf("%d\t<%s,%s>\n",currentLine,yytext,"String Literal");}
\( printf("%d\t<%s,%s>\n",currentLine,yytext,"parenthesisOpen");
\) printf("%d\t<%s,%s>\n",currentLine,yytext,"parenthesisClose");
\{ printf("%d\t<%s,%s>\n",currentLine,yytext,"blockOpen");
\} printf("%d\t<%s,%s>\n",currentLine,yytext,"blockClose");
"+"|"-"|"/"|"*"|"<=" {printf("%d\t<%s,%s>\n",currentLine,yytext,"ArithmeticOperator");}
(\&\&)|(\|\|)|! printf("%d\t<%s,%s>\n",currentLine,yytext,"LogicalOperator");
&|\||~ printf("%d\t<%s,%s>\n",currentLine,yytext,"BitwiseOperator");
\/\/[^\n] printf("%d\t<%s,%s>\n",currentLine,yytext,"SingleLineComment");
(\/\*.*\*\/) printf("%d\t<%s,%s>\n",currentLine,yytext,"MultiLineComment");
; printf("%d\t<%s,%s>\n",currentLine,yytext,"Separator");
.* printf("%s\tany match\n",yytext);
%%
int yywrap(){
return 1;
}
int main(int argc, char *argv[]){
if(argc!=2){
printf("Invalid arguments !\n Usage: lexgen <filename>\n");
return 1;
}
yyin=fopen(argv[1],"r");
if(yyin==0){
printf("File not found !\n");
return 2;
}
printf("Lexical Analyser for C :-\n");
printf("Line\tToken\n");
yylex();
fclose(yyin);
return 0;
}
input file:
#include<stdio.h>
#define PI 3.14
int a=5;
double
< + - *
<= >= ! ~
"hskldjh";
This is other program tmp.l which does work(it works for int a=5; as for lexer.l it just ignores)
%{
#include<stdio.h>
#include<string.h>
char err[20][50],name[20][20];
int lno=1,cnt=0,ecnt=0,elno[20];
void st_add(char *);
%}
%%
[0-9]+ {printf("%d %s Number\n",lno,yytext);}
[+-/*] {printf("%d %s Operator\n",lno,yytext);}
= {printf("%d %s Assignment\n",lno,yytext);}
main|return|include|if|else|switch|cin|cout|using|namespace|std {printf("%d %s Keyword\n",lno,yytext);}
int|double|char|float {printf("%d %s Data type\n",lno,yytext);}
[\t ] ;
\n {lno++;}
(\/\/.*) ;
(\/\*[^*/]*\*\/) ;
(\/\*[^*/]*) {elno[ecnt]=lno;char str[100]="Unterminated comment";strcpy(err[ecnt],str);ecnt++;}
printf|scanf {printf("%d %s Library function\n",lno,yytext);}
[a-z]+[a-zA-Z0-9]* {printf("%d %s Identifier\n",lno,yytext);st_add(yytext);}
([a-zA-Z0-9]+\.h) {printf("%d %s Header\n",lno,yytext);}
\( {printf("%d %s Open bracket\n",lno,yytext);}
\) {printf("%d %s Close bracket\n",lno,yytext);}
\<< {printf("%d %s insertion\n",lno,yytext);}
\>> {printf("%d %s extraction\n",lno,yytext);}
\{ {printf("%d %s Block start\n",lno,yytext);}
\} {printf("%d %s Block end\n",lno,yytext);}
# {printf("%d %s Preprocessor\n",lno,yytext);}
; {printf("%d %s Terminator\n",lno,yytext);}
(\"[^\"]*\") {printf("%d %s String literal\n",lno,yytext);}
(\"[^\"\n]*\n) {elno[ecnt]=lno;char str[100]="Unterminated quote";strcpy(err[ecnt],str);ecnt++;lno++;}
[0-9]+[a-zA-z]* {elno[ecnt]=lno;char str[100]="Unrecognized token";strcpy(err[ecnt],str);ecnt++;}
%%
void st_add(char s[20])
{
int i;
for(i=0;i<cnt;i++)
{
if(strcmp(name[i],s)==0)
return;
}
strcpy(name[cnt],s);
cnt++;
}
main()
{
char file[20];
printf("Enter file name:");
scanf("%s",file);
yyin=fopen(file,"r");
printf("Line No. Lexeme Token\n");
yylex();
printf("Number of errors: %d\n",ecnt);
int i=0;
for(i=0;i<ecnt;i++)
printf("Line no.: %2d %s\n",elno[i],err[i]);
printf("\nSymbol Table\n");
for(i=0;i<cnt;i++)
printf("%s\n",name[i]);
return 0;
}
int yywrap()
{
return 1;
}
tmp.l output:
Line No. Lexeme Token
1 # Preprocessor
1 include Keyword
<1 stdio.h Header
>2 # Preprocessor
2 define Identifier
PI2 3 Number
2 . Operator
2 14 Number
3 int Data type
3 a Identifier
3 = Assignment
3 5 Number
3 ; Terminator
4 double Data type
<5 + Operator
5 - Operator
5 * Operator
<6 = Assignment
>6 = Assignment
!~7 "hskldjh" String literal
7 ; Terminator
Number of errors: 0
Symbol Table
define
a
lexer.l output:
Lexical Analyser for C :-
Line Token
1 <include,PreProcessor>
2 <define,PreProcessor>
int a=5; any match
4 <double,Keyword>
< + - * any match
<= >= ! ~ any match
"hskldjh"; any match
It doesn't even match regex for relational, arithmetic operators. if input is int only then it displays int,keyword but if input is int a=5 it ignores but as for tmp.l it works perfectly fine! Is there a specific order of how should i write rules in flex?
(F)lex always uses the rule which has the longest match. The rule .*
matches input up to the end of the line, which will be a longer match than any other rule unless the token is at the end of the line.