Search code examples
pythonassemblysly

Trying to make assembly-level sly programming language


I'm trying to write a low-level parser using sly and python. However, when I run my code I get an error:

syntax error: "1", at line 1

The code in question is:

#!/bin/python3

from sly import Lexer, Parser

class LowLexer(Lexer):
    tokens = {DAT, MOV, ADD, SUB, MUL, 
              DIV, MOD, JMP, JMZ, JMN, 
              DJN, SEQ, SNE, CMP, SLT, 
              NOP, NAME, STRING, NUMBER}
    ignore = '\t '
    literals = { '=', ',', ';'}

    # Define tokens as regular expressions
    # (stored as raw strings)
    NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
    STRING = r'\".*?\"'
    DAT = r'DAT'
    MOV = r'MOV'
    ADD = r'ADD'
    SUB = r'SUB'
    MUL = r'MUL'
    DIV = r'DIV'
    MOD = r'MOD'
    JMP = r'JMP'
    JMZ = r'JMZ'
    JMN = r'JMN'
    DJN = r'DJN'
    SEQ = r'SEQ'
    SNE = r'SNE'
    CMP = r'CMP'
    SLT = r'SLT'
    NOP = r'NOP'

    @_(r'\n')
    def line_count(self, t):
        self.lineno += 1

    @_(r'\[[^\[\]]*\]')
    def comment(self, t):
        self.lineno += t.value.count("\n")

    @_(r'\d+')
    def NUMBER(self, t):
        t.value = int(t.value)
        return t

    def error(self, t):
        print("error at line {}: bad character - {}".format(self.lineno, t.value[0]))
        exit(1)   

class LowParser(Parser):
    #tokens are passed from lexer to parser
    tokens = LowLexer.tokens
    precedence = (
        ('left', DAT, MOV, ADD, SUB, MUL),
        ('left', DIV, MOD, JMP, JMZ, JMN),
        ('left', DJN, SEQ, SNE, CMP, SLT),
        ('left', NOP)
        )
  
    def __init__(self):
        self.env = { }
  
    def error(self, t):
        print("syntax error: \"{}\", at line {}".format(t.value, t.lineno))
        exit(1)

    @_('')
    def statement(self, p):
        print("in here statement")
        pass
    
    @_('expr')
    def statement(self, p):
        print("in here expr")
        return (p.expr)
    
    @_('MOV NUMBER NUMBER')
    def expr(self, p):
        print("in here mov")
        return ('MOV', p.expr0, p.expr1)

    @_('NAME')
    def expr(self, p):
        return ('var', p.NAME)
  
    @_('NUMBER')
    def expr(self, p):
        print("in here number")
        return ('num', p.NUMBER)

def main():
    lexer = LowLexer()
    parser = LowParser()
    expression = "MOV 1 2"
    tree = parser.parse(lexer.tokenize(expression))

if __name__=="__main__":
    main()

It seemed pretty straightforward to me but from my understanding you are supposed to first define tokens, which I did, and then you have to parse them appropriate, which I think I'm not doing correctly. Does anyone know how I might go about doing this appropriately?


Solution

  • The current issue is with your tokenizer. Currently, it is reading your "MOV" as a NAME. Moving your definition of NAME below your instruction constants gets it past that error. Then you need to fix an issue with your "MOV NUMBER NUMBER" construct for expr:

    @_('MOV NUMBER NUMBER')
    def expr(self, p):
        print("in here mov")
        return ('MOV', p.expr0, p.expr1)
    

    will not work because you have no parameters named expr. Instead you want this:

    @_('MOV NUMBER NUMBER')
    def expr(self, p):
        print("in here mov")
        return ('MOV', p.NUMBER0, p.NUMBER1)