Search code examples
javascriptregexrecursionodata

JS Parse OData $filter string into Object Structure


I am trying to convert an OData filter string into an object that represents the filter. Below is an example of a complex (non-realistic) OData filter that I'd like to convert.

substringof('v', p1) or (p2 gt 5 and p3 eq 'v v' and (p4 eq false or startswith(p5.expand, 'v'))) or (p6 eq 6 and p7 eq 7)

Using the regex and function below, I have broken this into two parts: an array of filter parts and a string with each filter part replaced by the index in the array.

var conditionMatcher = new RegExp("(substringof\\(.+?\\)|startswith\\(.+?\\)|endswith\\(.+?\\)|[\\w\\.]+?\\s(?:eq|ne|gt|ge|lt|le)\\s(?:\\w+|\\'.+?\\'))", "g");
var filters = predicateString.match(conditionMatcher);
var i = 0;
var simpleString = predicateString.replace(conditionMatcher, function () {
    return i++;
});

// simpleString =
0 or (1 and 2 and (3 or 4)) or (5 and 6)

The goal is to convert the string to something like this:

{
    op: "or",
    filters: [
        0,
        {
            op: "and",
            filters: [
                1,
                2,
                {
                    op: "or",
                    filters: [
                        3,
                        4
                    ]
                }
            ]
        },
        {
            op: "and",
            filters: [
                5,
                6
            ]
        }
    ]
}

I'm just not sure how to get here. If anyone has ideas, it would be much appreciated while I continue to work through a solution.


Solution

  • So far, my solution for this is to parse the string looking for the first closing parenthesis and its matching open parenthesis, building a filter from the group, replacing the group with the index of the filter and working outwards.

    // This array contains the filter parts extracted from the code in my question  
    var filters = [...]; 
    var filterString = "0 or (1 and 2 and (3 or 4)) or (5 and 6)";
    
    var groupString;
    var groupFilter = null;
    var testNextLevel = true;
    
    while (testNextLevel) {
        var closeParenthesisIndex = filterString.indexOf(')');
        if (closeParenthesisIndex !== -1) {
            var openParenthesisIndex = filterString.lastIndexOf('(', closeParenthesisIndex);
    
            // Extract the string between the first deepest set of parenthesis
            groupString = filterString.substring(openParenthesisIndex + 1, closeParenthesisIndex);
    
            // Modify the filter string replacing the contents of the group string in addition to the parenthesis with the length of the filters array (which will be the index of the filter object that we push)
            filterString = filterString.substring(0, openParenthesisIndex) + filters.length + filterString.substring(closeParenthesisIndex + 1);
        } else {
    
            // There are no more parenthesis groups
            groupString = filterString;
            testNextLevel = false;
        }
    
        // If the group uses both 'and' and 'or' then return null as an invalid filter string.
        if (groupString.indexOf('and') >= 0 && groupString.indexOf('or') >= 0) {
            return null;
        }
    
        // Get the group indexes out of the group string
        var groupFilterIndexes = groupString.match(/[0-9]+/g);
        var groupFilters = [];
    
        // Create an array with each of the filters who's index matches the group indexes
        for (i = 0; i < groupFilterIndexes.length; i++) {
            groupFilters.push(filters[Number(groupFilterIndexes[i])]);
        }
        var op = groupString.indexOf('or') >= 0 ? 'or' : 'and';
    
        // Create the filter object and push it onto the filters array
        groupFilter = { op: op, filters: groupFilters };
        filters.push(groupFilter);
    }
    
    return groupFilter;
    

    As each level is checked, the filter object is built and the matching indexes are pushed into the filter object. The filter object is then pushed into the filters array and the current portion of the filterString is replaced with the index of the filter object. The sequence looks like this:

    // First Level
    starting filterString: "0 or (1 and 2 and (3 or 4)) or (5 and 6)"
    starting filters.length: 7
    groupString: "3 or 4"
    filters[7] = { op: "or", filters: [filters[3], filters[4]] }
    filterString after extraction: "0 or (1 and 2 and 7) or (5 and 6)"
    filters.length: 8
    
    // Second Level
    starting filterString: "0 or (1 and 2 and 7) or (5 and 6)"
    starting filters.length: 8
    groupString: "1 and 2 and 7"
    filters[8] = { op: "and", filters: [filters[1], filters[2], filters[7]] }
    filterString after extraction: "0 or 8 or (5 and 6)"
    filters.length: 9
    
    // Third Level
    starting filterString: "0 or 8 or (5 and 6)"
    starting filters.length: 9
    groupString: "5 and 6"
    filters[9] = { op: "and", filters: [filters[5], filters[6]] }
    filterString after extraction: "0 or 8 or 9"
    filters.length: 10
    
    // Final Level
    starting filterString: "0 or 8 or 9"
    starting filters.length: 10
    groupString: "0 or 8 or 9"
    filters[10] = { op: "or", filters: [filters[0], filters[8], filters[9]] }
    filters.length: 11
    
    // Return filters[10]