I am trying to process input file with description of algorithm behavior. I am using python's PLY module for defining lexer and parser. I stumbled upon problem of defining grammar which will enforce user to correctly write this file.
File
# Beginning of the first section
STATES = INITIATOR, IDLE, DONE;
INIT = INITIATOR, IDLE;
TERM = DONE;
# End of first section
# Beginning of the second section
INITIATOR
RANDOM
begin
SEND(x, NEIGHBORS);
BECOME(DONE);
end
IDLE
RECEIVE(x)
begin
SEND(x, NEIGHBORS);
BECOME(DONE);
end
# End of second section
Lexer
import ply.lex as lex
from soda.helpers import prepare_file
class Lexer(object):
keywords = (
'INIT', 'TERM', 'STATES', 'REGISTERS',
'begin', 'end',
'SEND', 'BECOME'
)
tokens = keywords + (
'NAME', 'EQUALS', 'COMMA', 'SEMICOLON',
'LPAREN', 'RPAREN'
)
# Tokens
t_EQUALS = r'='
t_COMMA = r','
t_SEMICOLON = r';'
t_STATES = r'STATES'
t_REGISTERS = r'REGISTERS'
t_INIT = r'INIT'
t_TERM = r'TERM'
t_begin = r'begin'
t_end = r'end'
t_SEND = r'SEND'
t_BECOME = r'BECOME'
t_LPAREN = r'\('
t_RPAREN = r'\)'
# Ignored characters
t_ignore = ' \t\n'
def t_NAME(self, t):
r'[a-zA-Z][a-zA-Z]*'
if t.value in self.keywords: # is this a keyword?
t.type = t.value
return t
def t_error(self, t):
print ("Illegal character {0} at line {1}".format(t.value[0], t.lineno))
t.lexer.skip(1)
def build(self, **kwargs):
self._lexer = lex.lex(module=self, **kwargs)
@prepare_file
def lexical_analysis(self, file):
print ("Started lexical analysis...")
for line in file:
try:
lex_input = line
except EOFError:
break
self._lexer.input(lex_input)
while True:
token = self._lexer.token()
if not token:
break
print (" ", token)
Parser
import ply.yacc as yacc
from soda.helpers import prepare_file
class Parser(object):
def p_algorithm(self, p):
''' algorithm : first_section second_section'''
def p_first_section(self, p):
''' first_section : STATES EQUALS states_list SEMICOLON
| REGISTERS EQUALS register_list SEMICOLON
| INIT EQUALS init_list SEMICOLON
| TERM EQUALS term_list SEMICOLON'''
def p_states_list(self, p):
''' states_list : state_term
| states_list COMMA state_term'''
def p_state_term(self, p):
''' state_term : NAME'''
self.behavior.states.append(p[1])
def p_register_list(self, p):
''' register_list : register_term
| register_list COMMA register_term'''
def p_register_term(self, p):
''' register_term : NAME'''
self.behavior.registers.append(p[1])
def p_init_list(self, p):
''' init_list : init_term
| init_list COMMA init_term'''
def p_init_term(self, p):
''' init_term : NAME'''
self.behavior.init_states.append(p[1])
def p_term_list(self, p):
''' term_list : term_term
| term_list COMMA term_term'''
def p_term_term(self, p):
''' term_term : NAME'''
self.behavior.term_states.append(p[1])
def p_second_section(self, p):
''' second_section : NAME begin commands end'''
def p_error(self, p):
print("Syntax error in input! -> {}".format(p))
def build(self, lexer, behavior):
self.lexer = lexer
self.behavior = behavior
self.tokens = lexer.tokens
self._parser = yacc.yacc(module=self)
@prepare_file
def parsing(self, file):
for line in file:
try:
parser_input = line
print (line)
except EOFError:
break
self._parser.parse(parser_input, lexer=self.lexer._lexer)
Parsing results in syntax error and I am not sure how to define rules to enforce the consistency of file with algorithm behavior. first_section is parsed ok and problem is second_section. My solution defines that algorithm : first_section second_section and it is not working. I tried to define it like algorithm: first_section | second_section and it works good but this rule states that first and second section can be switched in file.
So my question is how to enforce it with rules so user will keep the input file consistent.
Error output
enter STATES = INITIATOR, IDLE, DONE;
Syntax error in input! -> None
INIT = INITIATOR, IDLE;
Syntax error in input! -> None
TERM = DONE;
Syntax error in input! -> None
INITIATOR
Syntax error in input! -> LexToken(NAME,'INITIATOR',1,0)
begin
Syntax error in input! -> LexToken(begin,'begin',1,0)
Program just states there is error in syntax. Problem is not with lexical analysis but with defined grammar. I can define it in such way that input is accepted but for example user would be able to switch first_section
with second_section
.
Edit
I think it is not clear from this question what I want to achieve or my problem so I voted to close it. I came up with idea how to better state what I am looking for so I want to raise new question.
Oups! Your grammar parses the file line by line, which is at least uncommon and does not allow to control the ordering of lines. IMHO, you should parse the file as a whole. The trick is to pass the parser a tokenfunc
function that will feed the lexer with one line at a time, and declare each section to be composed of lines:
class Parser(object):
def p_algorithm(self, p):
''' algorithm : first_section second_section'''
def p_first_section(self, p):
''' first_section : first_section_line
| first_section_line first_section'''
def p_first_section_line(self, p):
''' first_section_line : STATES EQUALS states_list SEMICOLON
| REGISTERS EQUALS register_list SEMICOLON
| INIT EQUALS init_list SEMICOLON
| TERM EQUALS term_list SEMICOLON'''
...
# same for second section...
@prepare_file
def parsing(self, file):
def get_token():
'a tokenizer that automatically feeds the lexer with the next line'
while True:
tok = self.lexer._lexer.token()
if tok is not None: return tok
try:
line = next(file)
self.lexer._lexer.input(line)
except StopIteration:
return None
self._parser.parse("", lexer=self.lexer._lexer, tokenfunc = get_token)