I'm using pegjs to define a grammar that allows new types to be defined. How do I then recognize those types subsequent to their definition? I have a production that defines the built in types, e.g.
BuiltInType
= "int"
/ "float"
/ "string"
/ TYPE_NAME
But what do I do for the last one? I don't know what possible strings will be type names until they are defined in the source code.
In the traditional way of parsing where there is both a lexer and a parser, the parser would add the type name to a table and the lexer would use this table to determine whether to return TYPE_NAME or IDENTIFIER for a particular token. But pegjs does not have this separation.
You're right, you cannot (easily) modify pegjs' generated parser on the fly without knowing a lot about its internals. But what you lose from a standard LALR, you gain in interspersing JavaScript code throughout the parser rules themselves.
To accomplish your goal, you'll need to recognize new types (in context) and keep them for use later, as in:
{
// predefined types
const types = {'int':true, 'float':true, 'string':true}
// variable storage
const vars = {}
}
start = statement statement* {
console.log(JSON.stringify({types:types,vars:vars}, null, 2))
}
statement
= WS* typedef EOL
/ WS* vardef EOL
typedef "new type definition" // eg. 'define myNewType'
= 'define' SP+ type:symbol {
if(types[type]) {
throw `attempted redefinition of: "${type}"`
}
types[type]=true
}
// And then, when you need to recognize a type, something like:
vardef "variable declaration" // eg: 'let foo:myNewType=10'
= 'let' SP+ name:symbol COLON type:symbol SP* value:decl_assign? {
if(!types[type]) {
throw `unknown type encountered: ${type}`
}
vars[name] = { name: name, type:type, value: value }
}
decl_assign "variable declaration assignment"
= '=' SP* value:number {
return value
}
symbol = $( [a-zA-Z][a-zA-Z0-9]* )
number = $( ('+' / '-')? [1-9][0-9]* ( '.' [0-9]+ )? )
COLON = ':'
SP = [ \t]
WS = [ \t\n]
EOL = '\n'
which, when asked to parse:
define fooType
let bar:fooType = 1
will print:
{
"types": {
"int": true,
"float": true,
"string": true,
"fooType": true
},
"vars": {
"bar": {
"name": "bar",
"type": "fooType",
"value": "1"
}
}
}