I am trying to build code that can generate MathML for Traditional Math Input. I am using JavaScript for coding. Are there any references or recommended readings I can go through to get a grasp of required algorithm? I am reading W3C standards for MathML which is a reference for standards but not for algorithm.
For Instance, for a sample input of
sqrt 9 * 5 + 20
I want to generate the MathML expression like below
<math xmlns='w3.org/1998/Math/MathML'>; <mrow> <mrow> <mn>5</mn> <mo>⁢</mo> <mn>9</mn> <mo>⁢</mo> <mi>SQRT</mi> </mrow> <mo>+</mo> <mn>20</mn> </mrow> </math>
I found a nice tutorial on MathML here: http://rypress.com/tutorials/mathml/basic-algebra.html and started to work out a very basic algebraic parser (for example, 4*sqrt(x+6)=(5-z)*y/7
) with a crude stack for handling parentheses and an example sqrt
function. Is this the direction you are after?
jsfiddle here: http://jsfiddle.net/alhambra1/bSJyE/
JavaScript code:
<script>
document.write('<p><input id="input" size=50>')
document.write('<button onclick="convertToMathML()">Convert</button></p>')
document.write('<div id="output"></div>')
function lex(str,ptr){
var ascii = str.charCodeAt(ptr),
lexeme = {string: "", type: ""},
operators = {"+": "+"
, "-": "-"
, "*": "×"
, "/": "÷"
, "=": "="},
functions = {sqrt: "msqrt"}
//identify type
if (ascii == 41)
lexeme.type = "closeBracket"
else if (ascii == 40){
lexeme.type = "func"
lexeme.func = "mfenced"
}
else if (ascii > 45 && ascii < 58 && ascii != 47)
lexeme.type = "mn"
else if ((ascii > 64 && ascii < 91) || (ascii > 96 && ascii < 123)){
for (i in functions){
if (str.substr(ptr,i.length).toLowerCase() == i){
lexeme.type = "func"
lexeme.func = functions[i]
ptr += i.length - 1
} else
lexeme.type = "mi"
}
} else if (!operators[str.charAt(ptr)])
return {string: str.charAt(ptr), type: "error", pointer: ptr}
else
lexeme.type = "mo"
switch (lexeme.type){
case "mo":
lexeme.string = operators[str.charAt(ptr++)]
break
default:
lexeme.string = str.charAt(ptr++)
break
}
ascii = str.charCodeAt(ptr)
//identify numbers and functions
if (lexeme.type == "mn"){
while (ptr < str.length && ascii > 45 && ascii < 58 && ascii != 47){
lexeme.string += str.charAt(ptr)
ascii = str.charCodeAt(++ptr)
}
} else if (lexeme.type == "func" && lexeme.func != "mfenced"){
while (ptr < str.length && str.substr(ptr).match(/^\s/)){
ascii = str.charCodeAt(++ptr)
}
if (str.charAt(ptr) != "(")
return {string: str.charAt(ptr), type: "error", pointer: ptr}
else
ptr++
}
lexeme["pointer"] = ptr
return lexeme
}
function markup(lexeme){
return "<" + lexeme.type + ">\n"
+ lexeme.string + "\n"
+ "</" + lexeme.type + ">\n"
}
function convertToMathML(){
var str = document.getElementById('input').value,
expression = "",
ptr = 0,
stack = []
while (ptr < str.length){
var currLexeme = lex(str,ptr)
if (currLexeme.type == "closeBracket"){
if (stack.length == 0)
expression = "Extra bracket at: " + (currLexeme.pointer - 1)
else
expression += "</" + stack.pop().func + ">\n"
+ "</mrow>"
ptr = currLexeme.pointer
} else if (currLexeme.type == "error"){
expression = "Cannot parse \"" + currLexeme.string
+ "\" at " + currLexeme.pointer
break
} else if (currLexeme.type == "func"){
expression += "<" + currLexeme.func + ">\n"
+ "<mrow>\n"
stack.push(currLexeme)
ptr = currLexeme.pointer
} else {
expression += markup (currLexeme)
ptr = currLexeme.pointer
}
}
if (ptr >= str.length && stack.length > 0)
expression = "Missing " + stack.length + " closing bracket/s."
expression = "<math xmlns='http://www.w3.org/1998/Math/MathML'>"
+ expression + "</math>"
document.getElementById('output').innerHTML = expression
}
</script>