I'm trying to write a low-level parser using sly and python. However, when I run my code I get an error:
syntax error: "1", at line 1
The code in question is:
#!/bin/python3
from sly import Lexer, Parser
class LowLexer(Lexer):
tokens = {DAT, MOV, ADD, SUB, MUL,
DIV, MOD, JMP, JMZ, JMN,
DJN, SEQ, SNE, CMP, SLT,
NOP, NAME, STRING, NUMBER}
ignore = '\t '
literals = { '=', ',', ';'}
# Define tokens as regular expressions
# (stored as raw strings)
NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
STRING = r'\".*?\"'
DAT = r'DAT'
MOV = r'MOV'
ADD = r'ADD'
SUB = r'SUB'
MUL = r'MUL'
DIV = r'DIV'
MOD = r'MOD'
JMP = r'JMP'
JMZ = r'JMZ'
JMN = r'JMN'
DJN = r'DJN'
SEQ = r'SEQ'
SNE = r'SNE'
CMP = r'CMP'
SLT = r'SLT'
NOP = r'NOP'
@_(r'\n')
def line_count(self, t):
self.lineno += 1
@_(r'\[[^\[\]]*\]')
def comment(self, t):
self.lineno += t.value.count("\n")
@_(r'\d+')
def NUMBER(self, t):
t.value = int(t.value)
return t
def error(self, t):
print("error at line {}: bad character - {}".format(self.lineno, t.value[0]))
exit(1)
class LowParser(Parser):
#tokens are passed from lexer to parser
tokens = LowLexer.tokens
precedence = (
('left', DAT, MOV, ADD, SUB, MUL),
('left', DIV, MOD, JMP, JMZ, JMN),
('left', DJN, SEQ, SNE, CMP, SLT),
('left', NOP)
)
def __init__(self):
self.env = { }
def error(self, t):
print("syntax error: \"{}\", at line {}".format(t.value, t.lineno))
exit(1)
@_('')
def statement(self, p):
print("in here statement")
pass
@_('expr')
def statement(self, p):
print("in here expr")
return (p.expr)
@_('MOV NUMBER NUMBER')
def expr(self, p):
print("in here mov")
return ('MOV', p.expr0, p.expr1)
@_('NAME')
def expr(self, p):
return ('var', p.NAME)
@_('NUMBER')
def expr(self, p):
print("in here number")
return ('num', p.NUMBER)
def main():
lexer = LowLexer()
parser = LowParser()
expression = "MOV 1 2"
tree = parser.parse(lexer.tokenize(expression))
if __name__=="__main__":
main()
It seemed pretty straightforward to me but from my understanding you are supposed to first define tokens, which I did, and then you have to parse them appropriate, which I think I'm not doing correctly. Does anyone know how I might go about doing this appropriately?
The current issue is with your tokenizer. Currently, it is reading your "MOV" as a NAME. Moving your definition of NAME below your instruction constants gets it past that error. Then you need to fix an issue with your "MOV NUMBER NUMBER" construct for expr:
@_('MOV NUMBER NUMBER')
def expr(self, p):
print("in here mov")
return ('MOV', p.expr0, p.expr1)
will not work because you have no parameters named expr. Instead you want this:
@_('MOV NUMBER NUMBER')
def expr(self, p):
print("in here mov")
return ('MOV', p.NUMBER0, p.NUMBER1)