Search code examples
pythonwhile-loopcompiler-constructionyaccply

While cycle implementation never ends in Ply issue


Im trying to implement a small programming language with ply in python 3.8, the problem is that my implementation of a while loop never stops, and it seems that the condition in p[3] is never updated, here is the statement:

def p_statement_if(p):
    '''statement : IF LPAREN comparison RPAREN LBRAKET statement RBRAKET
                 | IF LPAREN comparison RPAREN LBRAKET statement RBRAKET ELSE LBRAKET statement RBRAKET'''
    if p[3]:
        p[0] = p[6]
    else:
        if p[10] is not None:
            p[0] = p[10]

def p_statement_while(p):
    'statement : WHILE LPAREN comparison RPAREN LBRAKET statement RBRAKET'
    while(p[3]):
        p[0] = p[6]

My question is how can I keep it always updating the condition that is in p[3]?

my entire code is this:

tokens = (
    'NAME','NUMBER',
    'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
    'LPAREN','RPAREN','LBRAKET','RBRAKET',
    'EQUAL','NOTEQ','LARGE','SMALL','LRGEQ','SMLEQ',
    'ENDSTM',
    )

reserved = {
    'while' : 'WHILE',
    'if'    : 'IF',
    'else'  : 'ELSE',
    'print' : "PRINT",
}
tokens += tuple(reserved.values())
# Tokens

t_PLUS    = r'\+'
t_MINUS   = r'-'
t_TIMES   = r'\*'
t_DIVIDE  = r'/'
t_EQUALS  = r'='

t_LPAREN  = r'\('
t_RPAREN  = r'\)'
t_LBRAKET  = r'\{'
t_RBRAKET  = r'\}'

t_EQUAL   = r'\=\='
t_NOTEQ   = r'\!\='
t_LARGE   = r'\>'
t_SMALL   = r'\<'
t_LRGEQ   = r'\>\='
t_SMLEQ   = r'\<\='

t_ENDSTM  = r';'

def t_NUMBER(t):
    r'\d+'
    t.value = int(t.value)
    return t

def t_NAME(t):
    r'[a-zA-Z_][a-zA-Z0-9_]*'
    if t.value in reserved:
        t.type = reserved[t.value]
    return t

# Ignored characters
t_ignore = " \t"

def t_newline(t):
    r'\n+'
    t.lexer.lineno += t.value.count("\n")

def t_error(t):
    print(f"Illegal character {t.value[0]!r}")
    t.lexer.skip(1)

# Build the lexer
import ply.lex as lex
lex.lex()

# Precedence rules for the arithmetic operators
precedence = (
    ('left','PLUS','MINUS'),
    ('left','TIMES','DIVIDE'),
    ('right','UMINUS'),
    )

# dictionary of names (for storing variables)
names = { }

def p_statement_statement(p):
    'statement : statement statement'

def p_statement_assign(p):
    'statement : NAME EQUALS expression ENDSTM'
    names[p[1]] = p[3]

def p_statement_expr(p):
    'statement : expression ENDSTM'
    print(p[1])

def p_expression_binop(p):
    '''expression : expression PLUS expression
                  | expression MINUS expression
                  | expression TIMES expression
                  | expression DIVIDE expression'''
    if p[2] == '+'  : p[0] = p[1] + p[3]
    elif p[2] == '-': p[0] = p[1] - p[3]
    elif p[2] == '*': p[0] = p[1] * p[3]
    elif p[2] == '/': p[0] = p[1] / p[3]

def p_comparison_binop(p):
    '''comparison : expression EQUAL expression
                  | expression NOTEQ expression
                  | expression LARGE expression
                  | expression SMALL expression
                  | expression LRGEQ expression
                  | expression SMLEQ expression'''
    if p[2] == '==':
        p[0] = p[1] == p[3]
    elif p[2] == '!=':
        p[0] = p[1] != p[3]
    elif p[2] == '>':
        p[0] = p[1] > p[3]
    elif p[2] == '<':
        p[0] = p[1] < p[3]
    elif p[2] == '>=':
        p[0] = p[1] >= p[3]
    elif p[2] == '<=':
        p[0] = p[1] <= p[3]

def p_statement_if(p):
    '''statement : IF LPAREN comparison RPAREN LBRAKET statement RBRAKET
                 | IF LPAREN comparison RPAREN LBRAKET statement RBRAKET ELSE LBRAKET statement RBRAKET'''
    if p[3]:
        p[0] = p[6]
    else:
        if p[10] is not None:
            p[0] = p[10]

def p_statement_while(p):
    'statement : WHILE LPAREN comparison RPAREN LBRAKET statement RBRAKET'
    while(p[3]):
        p[0] = p[6]

def p_statement_print(p):
    'statement : PRINT LPAREN expression RPAREN ENDSTM'
    print(p[3])

def p_expression_uminus(p):
    'expression : MINUS expression %prec UMINUS'
    p[0] = -p[2]

def p_expression_group(p):
    'expression : LPAREN expression RPAREN'
    p[0] = p[2]

def p_expression_number(p):
    'expression : NUMBER'
    p[0] = p[1]

def p_expression_name(p):
    'expression : NAME'
    try:
        p[0] = names[p[1]]
    except LookupError:
        print(f"Undefined name {p[1]!r}")
        p[0] = 0

def p_error(p):
    try:
        print(f"Syntax error at {p.value!r}")
    except:
        print("Error")

import ply.yacc as yacc
yacc.yacc()

s = open('input.txt','r').read()
yacc.parse(s)

The example input file input.txt has this:

a = 7;
b = a * 2;
print(a + b);
if(a<b){
    print(a);
}
while(a<b){
    a = a + 1;
    print(a);
}

and i got this output (never stops):

21
7
8

Solution

  • Ply does not change the rules of Python. It parses an input (once), producing whatever result you chose to implement in the parsing action functions. That's it.

    So it's evident that you cannot write an immediate interpreter as parsing rules. Execution happens after a program is parsed, and there is not a one-to-one relationship between program code and execution: some expressions (like your while condition) are evaluated many times; other expressions (like the failed branches of if-then-else statements) are not evaluated at all. Functions are called, with different arguments each time; you cannot predict what the values of those arguments are when you compile the code.

    The result of compiling a program is an "executable". That executable might be low-level machine code, or it might be some higher-level description of a program which explains what the program does when it is executed.

    If all that seemed too hand-wavy, I strongly suggest that you work your way through Abelson & Sussman's Structure and Interpretation of Computer Programs, which can be read for free on-line thanks to the generosity of its authors, and which almost 40 years after its initial publication, continues to be the best single-volume introduction to the concepts underlying computer programs. If you want to write an compiler (or an interpreter), start there.