I want to build a query for my system that could be used by external systems for configuration based on conditions.
On the backend I find it easy to have a JSON Clause tree which would be evaluated recursively.
[
"AND",
[
{
"operator": "eq",
"field": "section1.fieldabc",
"value": "value1"
},
[
"OR",
{
"operator": "lt",
"field": "section2.fieldxyz",
"value": 5000
},
{
"operator": "gt",
"field": "section2.fieldxyz",
"value": 1000
}
]
]
]
or something similar. (Above I have represented it something like an s-expression tree)
The thing is I want it as a JSON Clause Tree in Backend but I don't want the users to need to write anything like this. It would be great if I can create a query something like JQL (Jira query language) or something. But I don't want to spend a lot of effort actually making a full proof parser for the language that would convert.
Is there any standardised way to implement this? Maybe a standardised query language which gets converted using a library (in JS or Java).
From the end users perspective I want the above query to be like
section1.fieldabc == value1 AND (section2.fieldxyz<5000 OR section2.fieldxyz>10000)
Wrote a (relatively) simple parser in TypeScript that can parse binary operators (with correct order of operations) and constants, handle brackets, global variables and simple field accesses. The source code is available on my GitHub (which may or may not be updated in the future), while here's a snippet with a JS version:
const BINARY_OPERATORS = {
// AND/OR
'AND': 1,
'OR': 0,
// Equal stuff
'==': 2,
'!=': 2,
'<': 2,
'<=': 2,
'>': 2,
'>=': 2,
}
function parseConstant(input) {
// Numbers (including floats, octals and hexadecimals)
let match = input.match(/^\s*((?:0[xo]|\d*\.)?\d+)/);
if (match) {
const [{ length }, digits] = match;
if (digits.includes('.')) {
return [length, { type: 'constant', value: parseFloat(digits) }];
}
return [length, { type: 'constant', value: parseInt(digits) }];
}
// Strings
match = input.match(/^(\s*)(["'])/);
if (match) {
const [, white, quote] = match;
let value = '';
let escape = false;
for (let i = white.length; i < input.length; i++) {
const ch = input[i];
if (ch === '\\' && !escape) {
escape = true;
} else if (escape) {
escape = false;
value += ch;
} else if (ch === quote) {
return [i + 1, { type: 'constant', value }];
} else {
value += ch;
}
}
return [white.length];
}
// Booleans
match = input.match(/^\s*(true|false)/);
if (match) {
const [{ length }, bool] = match;
return [length, { type: 'constant', value: bool === 'true' }];
}
return [0];
}
function parseVariable(input) {
const match = input.match(/^\s*(\w+[\w\d]*)/);
if (!match) return [0];
return [match[0].length, { type: 'variable', name: match[1] }];
}
function orderBinaryOperations(expr) {
const { left, right } = expr;
const priority = BINARY_OPERATORS[expr.operator];
if (left.type == 'binop' && BINARY_OPERATORS[left.operator] < priority) {
// LOP < EXP
// (leftL LOP leftR) EXP exprR) => leftL LOP (leftR EXP exprR)
return orderBinaryOperations({
type: 'binop',
operator: left.operator,
left: left.left,
right: {
type: 'binop',
operator: expr.operator,
left: left.right,
right: expr.right,
},
});
} else if (right.type === 'binop' && BINARY_OPERATORS[right.operator] <= priority) {
// EXP >= ROP
// exprL EXP (rightL ROP rightR) => (exprL EXP rightL) ROP rightR
return orderBinaryOperations({
type: 'binop',
operator: right.operator,
left: {
type: 'binop',
operator: expr.operator,
left: expr.left,
right: right.left,
},
right: right.right,
});
}
return expr;
}
function parsePostExpression(expr, input) {
if (!expr[1]) return expr;
const trimmed = input.trimLeft();
const white = input.length - trimmed.length;
// Binary operation
for (const operator in BINARY_OPERATORS) {
if (trimmed.startsWith(operator)) {
const offset = expr[0] + white + operator.length;
const rightResult = parseExpression(trimmed.slice(operator.length));
if (!rightResult[1]) throw new Error(`Missing right-hand side expression for ${operator}`);
return parsePostExpression([
offset + rightResult[0],
orderBinaryOperations({
type: 'binop',
operator,
left: expr[1],
right: rightResult[1],
})
], trimmed.slice(rightResult[0]));
}
}
// Field access
const match = input.match(/^\.(\w+[\w\d]*)/);
if (match) {
const [{ length }, field] = match;
return parsePostExpression([
expr[0] + white + length,
{ type: 'field', object: expr[1], field }
], trimmed.slice(length));
}
return expr;
}
function parseExpression(input) {
// Constants
let result = parseConstant(input);
// Variables
if (!result[1]) result = parseVariable(input);
// Brackets
if (!result[1]) {
const match = input.match(/^\s*\(/);
if (match) {
const [{ length }] = match;
const brackets = parseExpression(input.slice(length));
if (brackets[1]) {
const offset = brackets[0] + length;
const endBracket = input.slice(offset).match(/^\s*\)/);
if (!endBracket) throw new Error(`Missing ')' in '${input}'`);
result = [offset + endBracket[0].length, {
type: 'brackets', expr: brackets[1]
}];
}
}
}
return parsePostExpression(result, input.slice(result[0]));
}
function parse(input) {
const [length, expr] = parseExpression(input);
if (length === input.length) {
if (expr) return expr;
throw new Error(`Unfinished expression`);
}
if (!expr) throw new Error(`Unexpected character at ${length}`);
throw new Error(`Unexpected character at ${length}`);
}
const parsed = parse('(section2.fieldxyz<5000 OR section2.fieldxyz>10000) AND section1.fieldabc == value1');
console.log(JSON.stringify(parsed, null, 4));
function formatExpression(expr) {
if (expr.type === 'binop') {
// Wrapping in [] so the order of operations is clearly visible
return `[${formatExpression(expr.left)} ${expr.operator} ${formatExpression(expr.right)}]`;
} else if (expr.type === 'brackets') {
return `(${formatExpression(expr.expr)})`;
} else if (expr.type === 'constant') {
return JSON.stringify(expr.value);
} else if (expr.type === 'field') {
return `${formatExpression(expr.object)}.${expr.field}`;
} else if (expr.type === 'variable') {
return expr.name;
}
throw new Error(`Unexpected expression type '${expr.type}'`);
}
console.log('=>', formatExpression(parsed));
Example output when converted to JSON:
{
"type": "binop",
"operator": "AND",
"left": {
"type": "brackets",
"expr": {
"type": "binop",
"operator": "OR",
"left": {
"type": "binop",
"operator": "<",
"left": {
"type": "field",
"object": {
"type": "variable",
"name": "section2"
},
"field": "fieldxyz"
},
"right": {
"type": "constant",
"value": 5000
}
},
"right": {
"type": "binop",
"operator": ">",
"left": {
"type": "field",
"object": {
"type": "variable",
"name": "section2"
},
"field": "fieldxyz"
},
"right": {
"type": "constant",
"value": 10000
}
}
}
},
"right": {
"type": "binop",
"operator": "==",
"left": {
"type": "field",
"object": {
"type": "variable",
"name": "section1"
},
"field": "fieldabc"
},
"right": {
"type": "variable",
"name": "value1"
}
}
}
I've went with always using objects with a type
field, although you can still convert the binop
objects to look like e.g. ['AND', expr1, expr2]
. And instead of simply having binary operations always be on a field that's just a a.b.c.etc
string, mine is a bit more advanced. Could still add restrictions, though, at least the groundworks are there.
I've tackled this issue because I enjoy writing these kind of things. I actually recommend going with Chandan's suggestion of using jQuery QueryBuilder or react-query-builder, to make it a lot easier and friendlier towards your users.
If you're more targeted towards "power users" which would enjoy an SQL-like syntax, my code could help. There are probably many better libraries out there to help with that, though, which might be more robust in e.g. reporting syntax errors or trying to access non-existant variables/fields. Then again, since my code is only about 150 lines (200 if you include types) and not written too weirdly, it shouldn't be too hard to adapt it for your needs if that suits you better.