Search code examples
parsingtypedefpegjs

How to parse type names defined during parse


I'm using pegjs to define a grammar that allows new types to be defined. How do I then recognize those types subsequent to their definition? I have a production that defines the built in types, e.g.

BuiltInType
  = "int"
  / "float"
  / "string"
  / TYPE_NAME

But what do I do for the last one? I don't know what possible strings will be type names until they are defined in the source code.

In the traditional way of parsing where there is both a lexer and a parser, the parser would add the type name to a table and the lexer would use this table to determine whether to return TYPE_NAME or IDENTIFIER for a particular token. But pegjs does not have this separation.


Solution

  • You're right, you cannot (easily) modify pegjs' generated parser on the fly without knowing a lot about its internals. But what you lose from a standard LALR, you gain in interspersing JavaScript code throughout the parser rules themselves.

    To accomplish your goal, you'll need to recognize new types (in context) and keep them for use later, as in:

    {
      // predefined types
      const types = {'int':true, 'float':true, 'string':true}
    
      // variable storage
      const vars = {}
    }
    
    start = statement statement* {
      console.log(JSON.stringify({types:types,vars:vars}, null, 2))
    }
    
    statement
      = WS* typedef EOL
      / WS* vardef EOL
    
    typedef "new type definition" // eg. 'define myNewType'
      = 'define' SP+ type:symbol {
        if(types[type]) {
          throw `attempted redefinition of: "${type}"`
        }
        types[type]=true
      }
    
    // And then, when you need to recognize a type, something like:
    
    vardef "variable declaration" // eg: 'let foo:myNewType=10'
      = 'let' SP+ name:symbol COLON type:symbol SP* value:decl_assign? {
       if(!types[type]) {
         throw `unknown type encountered: ${type}`
       }
       vars[name] = { name: name, type:type, value: value }
    }
    
    decl_assign "variable declaration assignment"
      = '=' SP* value:number {
        return value
      }
    
    symbol = $( [a-zA-Z][a-zA-Z0-9]* )
    
    number = $( ('+' / '-')? [1-9][0-9]* ( '.' [0-9]+ )? )
    
    COLON = ':'
    SP = [ \t]
    WS = [ \t\n]
    EOL = '\n'
    

    which, when asked to parse:

    define fooType
    let bar:fooType = 1
    

    will print:

    {
      "types": {
        "int": true,
        "float": true,
        "string": true,
        "fooType": true
      },
      "vars": {
        "bar": {
          "name": "bar",
          "type": "fooType",
          "value": "1"
        }
      }
    }