Search code examples
javascriptparsingjison

How to get Abstract Syntax Tree (AST) out of JISON parser?


So I have generated a parser via JISON:

// mygenerator.js
var Parser = require("jison").Parser;

// a grammar in JSON
var grammar = {
    "lex": {
        "rules": [
           ["\\s+", "/* skip whitespace */"],
           ["[a-f0-9]+", "return 'HEX';"]
        ]
    },

    "bnf": {
        "hex_strings" :[ "hex_strings HEX",
                         "HEX" ]
    }
};

// `grammar` can also be a string that uses jison's grammar format
var parser = new Parser(grammar);

// generate source, ready to be written to disk
var parserSource = parser.generate();

// you can also use the parser directly from memory

// returns true
parser.parse("adfe34bc e82a");

// throws lexical error
parser.parse("adfe34bc zxg");

My question is, how do I retrieve the AST now? I can see that I can run the parser against input, but it just returns true if it works or fails if not.

For the record, I am using JISON: http://zaach.github.com/jison/docs/


Solution

  • I'm not too familiar with Jison's inner workings, so I don't know any method that would do it.

    But in case you're interested in a little bruteforce to solve this problem, try this:

    First, create an object to hold the AST

    function jisonAST(name, x) { this.name = name; this.x = x; }
    
    // return the indented AST
    jisonAST.prototype.get = function(indent){
      // create an indentation for level l
      function indentString(l) { var r=""; for(var i=0;i<l;i++){r+="  "}; return r }
    
      var r = indentString(indent) + "["+this.name+": ";
      var rem = this.x;
      if( rem.length == 1 && !(rem[0] instanceof jisonAST) ) r += "'"+rem[0]+"'"; 
      else for( i in rem ){ 
          if( rem[i] instanceof jisonAST ) r += "\n" + rem[i].get(indent+1);
          else { r += "\n" + indentString(indent+1); r += "'"+rem[i]+"'"; }
        }
      return r + "]";
    }
    

    Add a little helper function for Jison's BNF

    function o( s ){
        r = "$$ = new yy.jisonAST('"+s+"',[";
        for( i = 1; i <= s.split(" ").length; i++ ){ r += "$"+i+"," }
        r = r.slice(0,-1) + "]);";
        return [s,r];
    }
    

    With this, continue to the example code (slight modification):

    var Parser = require("jison").Parser;
    
    // a grammar in JSON
    var grammar = {
        "lex": {
            "rules": [
               ["\\s+", "/* skip whitespace */"],
               ["[a-f0-9]+", "return 'HEX';"]
            ]
        },
        "bnf": {
            // had to add a start/end, see below
            "start" : [ [ "hex_strings", "return $1" ] ],
            "hex_strings" :[ 
                o("hex_strings HEX"), 
                o("HEX") 
            ]
        }
    };
    
    var parser = new Parser(grammar);
    // expose the AST object to Jison
    parser.yy.jisonAST = jisonAST
    

    Now you can try parsing:

    console.log( parser.parse("adfe34bc e82a 43af").get(0) );
    

    This will give you:

    [hex_strings HEX: 
      [hex_strings HEX: 
        [HEX: 'adfe34bc']  
        'e82a']  
      '43af']
    

    Small note: I had to add a "start" rule, in order to only have one statement that returns the result. It is not clean (since the BNF works fine without it). Set it as an entry point to be sure...