I'm writing a syntax highlighter for Ace Editor, and I'm having trouble correctly lexing function calls in this language. Function calls have two basic forms:
With parentheses:
function(foo, "bar")
With colons:
function: foo, "bar"
I can detect both forms, but once I go into the state of a colon-style function call, I have trouble getting back out of that state (which messes up the following lines). In particular, this problem exists when the function call ends with a string.
Below I've made a smaller version of the highlighter, that only focuses on this problem. The structure might seem overly complex, but bear in mind that this is part of a larger lexer, which I think warrants the complexity.
You can try it out in the mode creator with the following snippet, in which the third line does not get properly highlighted.
function(a, "bar")
function: a, "bar"
function("bar", a)
function: "bar", a
function("bar")
And here's the syntax definition:
define(function(require, exports, module) {
"use strict";
var oop = require("../lib/oop");
var TextHighlightRules = require("./text_highlight_rules").TextHighlightRules;
var MyHighlightRules = function() {
var functions = [ "function" ];
this.$rules = {
"start" : [
{
token : 'keyword',
regex : '\\b(?:' + functions.join('|') + ')(?=\\s*[:(])',
push : [
{ include : 'function' },
]
}
],
// A function call
'function' : [
{
token : 'text',
regex : /(?:[:(])/,
push : [
{ include : 'comma_list' },
]
}, {
token : 'keyword',
regex : /(?:\)|(?=$))/,
next : 'pop'
}
],
// A series of arguments, separated by commas
'comma_list' : [
{
token : 'text',
regex : /\s+/,
}, {
token : 'string',
regex : /"/,
next : 'string',
}, {
include : "variable_name"
}
],
'variable-name' : [
{
token : 'keyword',
regex : /[a-z][a-zA-Z0-9_.]*/,
// This makes no difference
next : 'pop'
},
],
'string': [
{
token : 'string.quoted',
regex : /"/,
next : 'pop'
},
{ defaultToken : 'string.quoted' }
],
};
this.normalizeRules();
};
oop.inherits(MyHighlightRules, TextHighlightRules);
exports.MyHighlightRules = MyHighlightRules;
});
In specific: the /(?:\)|(?=$))/
in function
seems to match only if the previous state was not a string. How can I get it to match regardless, so my lexer exists the function call even with colon-style function calls?
To confound things even more, if I change the regex to /(?:|(?=$))/
it highlights all the lines correctly, even though I can't understand why. What's going on here?
The main problem is that at the end of line ace allows only one state transition https://github.com/ajaxorg/ace/blob/master/lib/ace/tokenizer.js#L317. So after matching "
at the end of line and switching to function state, it won't call regexp again so $
won't match anything. You probably can report this issue on github.
second issue is variable_name variable-name typo in your code.
Here's a modified version of your highlighter, which uses ^
in addition to $
to get highlighting similar to what you wanted.
define(function(require, exports, module) {
"use strict";
var oop = require("../lib/oop");
var TextHighlightRules = require("./text_highlight_rules").TextHighlightRules;
var MyHighlightRules = function() {
var functions = [ "function" ];
this.$rules = {
"start" : [
{
token : 'keyword',
regex : '\\b(?:' + functions.join('|') + ')(?=\\s*[:(])',
push : [
{ include : 'function' },
]
}
],
// A function call
'function' : [
{
token : 'paren',
regex : /(?:[:(])/,
},
{
token : 'paren',
regex : /(?:\)|$|^)/,
next : 'pop'
},
{ include : 'commaList' },
],
// A series of arguments, separated by commas
'commaList' : [
{
token : 'text',
regex : /\s+/,
}, {
token : 'string.start',
regex : /"/,
push : 'string',
}, {
include : "variableName"
}
],
'variableName' : [
{
token : 'variable.parameter',
regex : /[a-z][a-zA-Z0-9_.]*/
},
],
'string': [
{
token : 'string.end',
regex : /"/,
next : 'pop'
},
{ defaultToken : 'string.quoted' }
],
};
this.normalizeRules();
};
oop.inherits(MyHighlightRules, TextHighlightRules);
exports.MyHighlightRules = MyHighlightRules;
});