Search code examples
syntax-highlightingace-editorlexical-analysis

Exit state at end of line in ace editor syntax highlighter


I'm writing a syntax highlighter for Ace Editor, and I'm having trouble correctly lexing function calls in this language. Function calls have two basic forms:

With parentheses:

function(foo, "bar")

With colons:

function: foo, "bar"

I can detect both forms, but once I go into the state of a colon-style function call, I have trouble getting back out of that state (which messes up the following lines). In particular, this problem exists when the function call ends with a string.

Below I've made a smaller version of the highlighter, that only focuses on this problem. The structure might seem overly complex, but bear in mind that this is part of a larger lexer, which I think warrants the complexity.

You can try it out in the mode creator with the following snippet, in which the third line does not get properly highlighted.

function(a, "bar")
function: a, "bar"
function("bar", a)
function: "bar", a
function("bar")

And here's the syntax definition:

define(function(require, exports, module) {
"use strict";

var oop = require("../lib/oop");
var TextHighlightRules = require("./text_highlight_rules").TextHighlightRules;

var MyHighlightRules = function() {

    var functions = [ "function" ];

    this.$rules = {
        "start" : [
            {
                token : 'keyword',
                regex : '\\b(?:' + functions.join('|') + ')(?=\\s*[:(])',
                push  : [
                    { include : 'function' },
                ]
            }
        ],
        // A function call
        'function' : [
            {
                token : 'text',
                regex : /(?:[:(])/,
                push  : [
                    { include : 'comma_list' },
                ]
            }, {
                token : 'keyword',
                regex : /(?:\)|(?=$))/,
                next  : 'pop'
            }
        ],
        // A series of arguments, separated by commas
        'comma_list' : [
            {
                token : 'text',
                regex : /\s+/,
            }, {
                token : 'string',
                regex : /"/,
                next : 'string',
            }, {
                include : "variable_name"
            }
        ],
        'variable-name' : [
            {
                token : 'keyword',
                regex : /[a-z][a-zA-Z0-9_.]*/,
                // This makes no difference
                next : 'pop'
            },
        ],
        'string': [
            {
                token : 'string.quoted',
                regex : /"/,
                next  : 'pop'
            },
            { defaultToken : 'string.quoted' }
        ],
    };

    this.normalizeRules();
};

oop.inherits(MyHighlightRules, TextHighlightRules);

exports.MyHighlightRules = MyHighlightRules;
});

In specific: the /(?:\)|(?=$))/ in function seems to match only if the previous state was not a string. How can I get it to match regardless, so my lexer exists the function call even with colon-style function calls?

To confound things even more, if I change the regex to /(?:|(?=$))/ it highlights all the lines correctly, even though I can't understand why. What's going on here?


Solution

  • The main problem is that at the end of line ace allows only one state transition https://github.com/ajaxorg/ace/blob/master/lib/ace/tokenizer.js#L317. So after matching " at the end of line and switching to function state, it won't call regexp again so $ won't match anything. You probably can report this issue on github.

    second issue is variable_name variable-name typo in your code.

    Here's a modified version of your highlighter, which uses ^ in addition to $ to get highlighting similar to what you wanted.

    define(function(require, exports, module) {
    "use strict";
    
    var oop = require("../lib/oop");
    var TextHighlightRules = require("./text_highlight_rules").TextHighlightRules;
    
    var MyHighlightRules = function() {
    
        var functions = [ "function" ];
    
        this.$rules = {
            "start" : [
                {
                    token : 'keyword',
                    regex : '\\b(?:' + functions.join('|') + ')(?=\\s*[:(])',
                    push  : [
                        { include : 'function' },
                    ]
                }
            ],
            // A function call
            'function' : [
                {
                    token : 'paren',
                    regex : /(?:[:(])/,
                },
                {
                    token : 'paren',
                    regex : /(?:\)|$|^)/,
                    next  : 'pop'
                },
                { include : 'commaList' },
            ],
            // A series of arguments, separated by commas
            'commaList' : [
                {
                    token : 'text',
                    regex : /\s+/,
                }, {
                    token : 'string.start',
                    regex : /"/,
                    push : 'string',
                }, {
                    include : "variableName"
                }
            ],
            'variableName' : [
                {
                    token : 'variable.parameter',
                    regex : /[a-z][a-zA-Z0-9_.]*/
                },
            ],
            'string': [
                {
                    token : 'string.end',
                    regex : /"/,
                    next  : 'pop'
                },
                { defaultToken : 'string.quoted' }
            ],
        };
    
        this.normalizeRules();
    };
    
    oop.inherits(MyHighlightRules, TextHighlightRules);
    
    exports.MyHighlightRules = MyHighlightRules;
    });