Search code examples
pythonparsingyaccply

PLY yacc parser : how can I handle unknown value, like imaginary numbers?


I'm using PLY : lex and yacc to create kind of a 'bc' command : with the possibility to resolve but also to assign variables.

So I can resolve stuff like that : 1 + 3.12 - 4 = ? or 5 * 3 = ? or (1 + 3) * 7 /2 = ?

And also stuff like that, knowing that 'abc' = 10: (abc + 3 * abc) * 2 = ?

But I really don't know how should I handle unknown values, or complex numbers, like imaginary numbers.

How can I handle this: 2 * 2 * i + 3 - 1 which should be equal to 4i + 2

I cannot think of a parsing rule to handle the imaginary number. Any help ?

My code:

1. main code :

from global_variables import tokens
from lexer import lexer
from parser import parser


while True:
    s = input('> ')
    if s:

        # lexer.input(s)
        # while True:
        #     tok = lexer.token()
        #     if not tok:
        #         break
        #     print(tok)

        parser.parse(s)

2. lexer :

from global_variables import tokens
import ply.lex as lex

t_PLUS      = r'\+'
t_MINUS     = r'\-'
t_TIMES     = r'\*'
t_DIVIDE    = r'\/'
t_MODULO    = r'\%'
t_EQUALS    = r'\='
t_LPAREN    = r'\('
t_RPAREN    = r'\)'
t_POWER     = r'\^'
t_QUESTION  = r'\?'
t_NAME      = r'[a-zA-Z]{2,}|[a-hj-zA-HJ-Z]'    # all words (only letters) except the word 'i' alone
t_IMAGINE   = r'i'                              # the word 'i' alone
t_COMMAND   = r'![\x00-\x7F]*'                  # all unicode characters after '!'

def t_NUMBER(t):
    r'\d+(\.\d+)?'
    try:
        t.value = int(t.value)
    except:
        t.value = float(t.value)
    return t

t_ignore = " \t"

def t_error(t):
    print("Illegal character '%s'" % t.value[0])
    t.lexer.skip(1)

lexer = lex.lex()

3. parser :

from global_variables import tokens
from global_variables import variables
from global_variables import prRed
from global_variables import prGreen
from global_variables import prLightPurple
import ply.yacc as yacc


precedence = (
    ('left','PLUS','MINUS'),
    ('left','TIMES','DIVIDE'),
    ('right','UMINUS'),
    )

def p_statement_assign(t):
    'statement : NAME EQUALS expression'
    variables[t[1].lower()] = t[3]
    print(t[3])

def p_statement_expr(t):
    '''statement : expression
                | expression EQUALS QUESTION'''
    print(t[1])

def p_expression_binop(t):
    '''expression : expression PLUS expression
                 | expression MINUS expression
                 | expression TIMES expression
                 | expression DIVIDE expression
                 | expression POWER expression
                 | expression MODULO expression'''
    if t[2] == '+': t[0] = t[1] + t[3]
    elif t[2] == '-': t[0] = t[1] - t[3]
    elif t[2] == '*': t[0] = t[1] * t[3]
    elif t[2] == '%': t[0] = t[1] % t[3]
    elif t[2] == '^': t[0] = t[1] ** t[3]
    elif t[2] == '/': t[0] = float(t[1]) / float(t[3])

    if t[0] % 1 == 0:
        t[0] = int(t[0])
    else:
        t[0] = float(t[0])


def p_expression_uminus(t):
    'expression : MINUS expression %prec UMINUS'
    t[0] = -t[2]

def p_expression_group(t):
    'expression : LPAREN expression RPAREN'
    t[0] = t[2]

def p_expression_number(t):
    '''expression : NUMBER
                 | IMAGINE'''
    t[0] = t[1]

def p_expression_name(t):
    '''expression : NAME
                 | NAME EQUALS QUESTION'''
    try:
        t[0] = variables[t[1].lower()]
    except LookupError:
        prRed("Undefined name '%s'" % t[1])
        t[0] = 0

def p_execute_command(t):
    'statement : COMMAND'
    letter = t[1].split('!')[1]
    if letter == 'h':
        prGreen("Help:")
        print("    - !p = print all variables")
        print("    - !q = quit the computor")
    elif letter == 'p':
        if variables:
            prGreen("Variables:")
            for key,value in variables.items():
                print("     {} = {}".format(key, value))
        else:
            prRed("Variables:")
            print("     There are no variables")
    elif letter == 'q':
        prGreen("Bye bye!")
        exit()
    else:
        print("Type '!h' for help.")


def p_error(t):
    if t:
        print("Syntax error at '%s'" % t.value)  
    else:
        print("Syntax error!")


parser = yacc.yacc()

4. some global variables :

tokens = (
    'NAME',
    'NUMBER',
    'PLUS',
    'MINUS',
    'TIMES',
    'DIVIDE',
    'MODULO',
    'EQUALS',
    'LPAREN',
    'RPAREN',
    'POWER',
    'QUESTION',
    'IMAGINE',
    'COMMAND',
)

variables = {}

def prRed(skk): print("\033[91m{}\033[00m" .format(skk))
def prGreen(skk): print("\033[92m{}\033[00m" .format(skk))
def prLightPurple(skk): print("\033[94m{}\033[00m" .format(skk))

Solution

  • i is just a reserved name here, so you can recognise it as such in the lexer (the same way as you recognise any other keyword). If you actually want 2i to be valid, then you'll need a bit more work in the lexer, but it should be clear what's needed. (Alternatively, you could put that in the parser so that 2 i would also be valid, but I've got to say that that looks weird to me.)

    So your issue is really not a parsing issue. All you need is a complex number datatype, which Python conveniently already has. (Also see this brief explanation.)

    Ply doesn't place any limits on semantic values. If you want to use complex numbers, just use them. (But note that Python uses j instead of i for jmagjnary numbers. That doesn't mean that you have to use them, too, but you if you want to use i, you'll have to do explicit number-to-string conversions.)