I have an AST generated by a parsing expression grammar from a target language that will compile to a source language by traversing its nodes. A simple source
like (10 + 20) * 2
should generate the following representation, as a native ECMAScript object:
var ast = {
"type": "Stmt",
"body": [
{
"type": "Expr",
"expression": {
"type": "BinaryExpr",
"operator": "*",
"left": {
"type": "BinaryExpr",
"operator": "+",
"left": {
"type": "Literal",
"value": 10
},
"right": {
"type": "Literal",
"value": 20
}
},
"right": {
"type": "Literal",
"value": 2
}
}
}
]
};
The generated object clearly defines the precedence of the operators, and evaluating this source is pretty easy, however, generating code back from it is a pretty complex task when you have to deal with the parenthesis solving.
When generating code by traversing nodes, the precedence is completely lost. I have a function called visitor
, which is the entry point of the program:
function visitor(node) {
switch (node.type) {
case "Stmt":
return parseStmt(node.body);
}
}
This simple grammar can accept multiple statements...
function parseStmt(body) {
var stmtList = Array(body.length);
for (var i = 0, len = body.length; i < len; i++) {
stmtList[i] = (function(stmt) {
switch (stmt.type) {
case "Expr":
return parseExpr(stmt.expression);
}
})(body[i]);
}
return stmtList.join(";\n");
}
... and two types of expressions:
function parseExpr(expr) {
switch (expr.type) {
case "BinaryExpr":
return parseBinaryExpr(expr);
case "Literal":
return parseLiteral(expr);
}
}
Where Literal
just deals with string transformation...
function parseLiteral(expr) {
return expr.value.toString();
}
... and BinaryExpr
is ambiguous when solving precedence:
function parseBinaryExpr(expr) {
var op = {
left: parseExpr(expr.left),
right: parseExpr(expr.right)
};
switch (expr.operator) {
case "+":
return Codegen.OP_ADD(op.left, op.right);
case "*":
return Codegen.OP_MUL(op.left, op.right);
}
}
Only two math operations are here supported, and the code generation happens here:
var Codegen = {
OP_ADD: function(left, right) {
return left + " + " + right;
},
OP_MUL: function(left, right) {
return left + " * " + right;
}
};
When calling visitor(ast)
back, I get 10 + 20 * 2
, which would eval to 10 + (20 * 2)
instead of (10 + 20) * 2
, and inserting parenthesis in each side of the binary expression would be a ridiculous workaround: (10 + 20) * 2
where:
function parseBinaryExpr(expr) {
var op = {
left: "(" + parseExpr(expr.left) + ")",
right: "(" + parseExpr(expr.right) + ")"
};
...
How could this ambiguity be solved in a good way?
Wouldn't a simple precedence table and looking up at the parent expression solve it?
Also, there were a little bug in the switch.
var ast = {
"type": "Stmt",
"body": [
{
"type": "Expr",
"expression": {
"type": "BinaryExpr",
"operator": "*",
"left": {
"type": "BinaryExpr",
"operator": "+",
"left": {
"type": "Literal",
"value": 10
},
"right": {
"type": "Literal",
"value": 20
}
},
"right": {
"type": "Literal",
"value": 2
}
}
}
]
};
var precedence = { "*": 0, "+": 1 };
var Codegen = {
OP_ADD: function(left, right) {
return left + " + " + right;
},
OP_MUL: function(left, right) {
return left + " * " + right;
}
};
function visitor(node) {
switch (node.type) {
case "Stmt":
return parseStmt(node.body);
}
}
function parseStmt(body) {
var stmtList = Array(body.length);
for (var i = 0, len = body.length; i < len; i++) {
stmtList[i] = (function(stmt) {
switch (stmt.type) {
case "Expr":
return parseExpr(stmt.expression, null);
}
})(body[i]);
}
return stmtList.join(";\n");
}
function parseExpr(expr, parent) {
switch (expr.type) {
case "BinaryExpr":
return parseBinaryExpr(expr, parent);
case "Literal":
return parseLiteral(expr);
}
}
function parseLiteral(expr) {
return expr.value.toString();
}
function parseBinaryExpr(expr, parent) {
var op = {
left: parseExpr(expr.left, expr),
right: parseExpr(expr.right, expr)
};
var ret = "";
switch (expr.operator) {
case "+":
ret = Codegen.OP_ADD(op.left, op.right);
break;
case "*":
ret = Codegen.OP_MUL(op.left, op.right);
break;
}
if (parent && precedence[expr.operator] > precedence[parent.operator]) {
ret = "(" + ret + ")";
}
return ret;
}
visitor(ast);
Or you can always put a parenthesis if there's a nested binary expression inside another, that would do the trick too.
if (parent) {
ret = "(" + ret + ")";
}
Just check parent because we only pass the parent if we already are inside a binary expression.