Search code examples
javascriptalgorithmmathml

MathML generation algorithm for Javascript. Any recommended references


I am trying to build code that can generate MathML for Traditional Math Input. I am using JavaScript for coding. Are there any references or recommended readings I can go through to get a grasp of required algorithm? I am reading W3C standards for MathML which is a reference for standards but not for algorithm.

For Instance, for a sample input of

sqrt 9 * 5 + 20

I want to generate the MathML expression like below

<math xmlns='w3.org/1998/Math/MathML'>; <mrow> <mrow> <mn>5</mn> <mo>&#8290;</mo> <mn>9</mn> <mo>&#8290;</mo> <mi>SQRT</mi> </mrow> <mo>+</mo> <mn>20</mn> </mrow> </math>

Solution

  • I found a nice tutorial on MathML here: http://rypress.com/tutorials/mathml/basic-algebra.html and started to work out a very basic algebraic parser (for example, 4*sqrt(x+6)=(5-z)*y/7) with a crude stack for handling parentheses and an example sqrt function. Is this the direction you are after?

    jsfiddle here: http://jsfiddle.net/alhambra1/bSJyE/

    JavaScript code:

    <script>
    document.write('<p><input id="input" size=50>')
    document.write('<button onclick="convertToMathML()">Convert</button></p>')
    document.write('<div id="output"></div>')
    
    function lex(str,ptr){
        var ascii = str.charCodeAt(ptr),
            lexeme = {string: "", type: ""},
            operators = {"+": "+"
                        , "-": "-"
                        , "*": "&times;"
                        , "/": "&divide;"
                        , "=": "="},
            functions = {sqrt: "msqrt"}
    
        //identify type
        if (ascii == 41)
            lexeme.type = "closeBracket"
        else if (ascii == 40){
            lexeme.type = "func"
            lexeme.func = "mfenced"
        }
        else if (ascii > 45 && ascii < 58 && ascii != 47)
            lexeme.type = "mn"
        else if ((ascii > 64 && ascii < 91) || (ascii > 96 && ascii < 123)){
            for (i in functions){
                if (str.substr(ptr,i.length).toLowerCase() == i){
                    lexeme.type = "func"
                    lexeme.func = functions[i]
                    ptr += i.length - 1
                } else
                    lexeme.type = "mi"
            }
        } else if (!operators[str.charAt(ptr)])
            return {string: str.charAt(ptr), type: "error", pointer: ptr}
        else
            lexeme.type = "mo"
    
        switch (lexeme.type){
            case "mo":
                lexeme.string = operators[str.charAt(ptr++)]
                break
            default:
                lexeme.string = str.charAt(ptr++)
                break
        }
    
        ascii = str.charCodeAt(ptr)
    
        //identify numbers and functions 
        if (lexeme.type == "mn"){
            while (ptr < str.length && ascii > 45 && ascii < 58 && ascii != 47){
                lexeme.string += str.charAt(ptr)
                ascii = str.charCodeAt(++ptr)
            }
        } else if (lexeme.type == "func" && lexeme.func != "mfenced"){
            while (ptr < str.length && str.substr(ptr).match(/^\s/)){
                ascii = str.charCodeAt(++ptr)
            }
            if (str.charAt(ptr) != "(")
                return {string: str.charAt(ptr), type: "error", pointer: ptr}
            else
                ptr++
        }
    
        lexeme["pointer"] = ptr
    
        return lexeme
    }
    
    function markup(lexeme){
        return "<" + lexeme.type + ">\n"
               + lexeme.string + "\n"
               + "</" + lexeme.type + ">\n"
    }
    
    function convertToMathML(){
        var str = document.getElementById('input').value,
            expression = "",
            ptr = 0,
            stack = []
    
        while (ptr < str.length){
            var currLexeme = lex(str,ptr)
    
            if (currLexeme.type == "closeBracket"){
                if (stack.length == 0)
                    expression = "Extra bracket at: " + (currLexeme.pointer - 1)
                else
                    expression += "</" + stack.pop().func + ">\n"
                                + "</mrow>"          
                ptr = currLexeme.pointer
            } else if (currLexeme.type == "error"){
                expression = "Cannot parse \"" + currLexeme.string
                           + "\" at " + currLexeme.pointer
                break
            } else if (currLexeme.type == "func"){
                expression += "<" + currLexeme.func + ">\n"
                            + "<mrow>\n"
                stack.push(currLexeme)
                ptr = currLexeme.pointer
            } else {
                expression += markup (currLexeme)
                ptr = currLexeme.pointer
            }
        }
    
        if (ptr >= str.length && stack.length > 0)
            expression = "Missing " +  stack.length + " closing bracket/s."
    
        expression = "<math xmlns='http://www.w3.org/1998/Math/MathML'>"
                   + expression + "</math>"
    
        document.getElementById('output').innerHTML = expression
    }
    </script>