Search code examples
c#treeexpressionset-operations

C# Evaluate Tree expressions where leaf nodes are sets of integers


I need to evaluate in C# ad-hoc queries a where value nodes of a tree expressions are sets of numbers.

A sample expression Set-A NOT SET-B AND (SET-C OR SET-D)

AND = INTERSECT
OR = UNION
NOT = EXCEPT

The expressions can get quite complex - my data sets are drawn from the respondents who answer surveys with many questions. I want to end up with the set of respondents who answered specific questions in specific ways.

I've tried building a tree style evaluator but while it works in most cases it fails in others (mostly if I put a NOT in different places).

Is there anyone who has done this before and come up with an elegant solution they would like to share? Preferably in C# - obviously I use LINQ to do the set operations, I need a way to build and evaluate trees that combine multiple sets in different ways.

Here is my current code (added in response to comment)

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using Z.Expressions;
using DIPEF = DataImport.EF;

namespace DataImport.Models
{
    public enum OPMODEL { NULL = 0, AND = 1, OR = 2, NOT = 3, ORNOT = 4 }

    public class DemographicOption
    {
        public long OptionId { get; set; }
        public long QuestionId { get; set; }
        public string QuestionText { get; set; }
        public string OptionText { get; set; }
        public OPMODEL Operation { get; set; }
        public bool LParen { get; set; }
        public bool RParen { get; set; }
        public string Logic { get; set; }
 
        public DemographicOption()
        {
            Operation = OPMODEL.NULL;
            QuestionId = 0;
        }
    }

    public class DemographicTree
    {
        public int projectId { get; set; }
        public OPMODEL Operation;  //root node
        public long OptionId;
        public long QuestionId;
        public string Logic;
        public string OptionText;
        public DemographicTree LChild; // Complex Node
        public DemographicTree RChild; // Complex Node
        public Stack<OPMODEL> opStack = new Stack<OPMODEL>();
        public bool Not { get; set; }
        public DemographicTree()
        {
            Operation = OPMODEL.NULL;
            LChild = null;
            RChild = null;
            Not = false;
        }

        private DemographicTree LChildTree(ref List<DemographicOption> list)
        {
            if (list == null || list.Count == 0)
                return null;
            DemographicOption o = list[0];
            DemographicTree T = new DemographicTree();
            T.Operation = o.Operation;
            T.LChild = new DemographicTree();
            T.LChild.QuestionId = o.QuestionId;
            T.LChild.Logic = o.Logic;
            T.LChild.OptionId = o.OptionId;
            T.LChild.OptionText = o.OptionText;
            list.RemoveAt(0);
            if (list.Count > 0)
            {
                opStack.Push(list[0].Operation);
                T.RChild = new DemographicTree(list);
            }
            return T;
        }

        public DemographicTree(List<DemographicOption> list)
        {
            if (list == null || list.Count == 0)
                return;
            DemographicOption o = list[0];
            DemographicOption ultima = list.Last();
            if (ultima.RParen == true && ultima.Operation == OPMODEL.NOT)
                Not = true;
            Operation = o.Operation;
            if (o.RParen == true)
            {
                OptionId = o.OptionId;
                QuestionId = o.QuestionId;
                Logic = o.Logic;
                OptionText = o.OptionText;

                opStack.Push(o.Operation);
                list.RemoveAt(0);
                return;
            }
            if (o.Operation == OPMODEL.NULL && list.Count == 1)
            {
                OptionId = o.OptionId;
                QuestionId = o.QuestionId;
                Logic = o.Logic;
                OptionText = o.OptionText;
                list.RemoveAt(0);
                return;
            }
        //    Operation = o.Operation;
            opStack.Push(Operation);
            if (o.LParen == true)
            {
                LChild = LChildTree(ref list);
                if (list == null || list.Count == 0)
                    return;
            }
            else
            {
                LChild = new DemographicTree();
                LChild.OptionId = o.OptionId;
                LChild.QuestionId = o.QuestionId;
                LChild.Logic = o.Logic;
                LChild.OptionText = o.OptionText;
                list.RemoveAt(0);
            }
            if (list.Count == 1 )
            {
                if (o.RParen == true)
                    LChild.Operation = o.Operation;
                o = list[0];
                RChild = new DemographicTree();
                RChild.OptionId = o.OptionId;
                RChild.QuestionId = o.QuestionId;
                RChild.Logic = o.Logic;
                RChild.OptionText = o.OptionText;
             //   Operation = opStack.Pop();
            }
            if (list.Count > 1)
            {
                Operation = opStack.Pop();
                RChild = new DemographicTree(list);
            }

        }

        public static bool EvaluateLogicalExpression(string logicalExpression)
        {
            System.Data.DataTable table = new System.Data.DataTable();
            table.Columns.Add("", typeof(bool));
            table.Columns[0].Expression = logicalExpression;

            System.Data.DataRow r = table.NewRow();
            table.Rows.Add(r);
            bool result = (Boolean)r[0];
            return result;
        }

        public long findFirstOptionId(DemographicTree dt)
        {
            if (dt.OptionId > 0)
                return dt.OptionId;
            if (dt.RChild != null)
                return findFirstOptionId(dt.RChild);
            if (dt.LChild != null)
                return findFirstOptionId(dt.LChild);
            return 0;
        }

        public  String toString(DemographicTree dt)
        {
            string s = "";
            if (dt == null) return s;
            using (var entities = new DIPEF.DataImportDB())
            {
                if (dt.Operation == OPMODEL.NULL && dt.QuestionId == 0)
                {
                    if (dt.OptionId > 0)
                        s = entities.Options.FirstOrDefault(O => O.OptionId == dt.OptionId).Name;
                    return s;
                }
                else if (dt.Operation == OPMODEL.NULL && dt.QuestionId > 0 && String.IsNullOrEmpty(dt.Logic))
                {
                    s = entities.Questions.FirstOrDefault(Q => Q.QuestionId == dt.QuestionId).Name;
                    return s;
                }
                else if (dt.QuestionId > 0 && !String.IsNullOrEmpty(dt.Logic))
                {
                    var q = entities.Questions.FirstOrDefault(Q => Q.QuestionId == dt.QuestionId);
                    s +=  string.Format(" {0} {1}", q.Name, dt.Logic);
                    if (dt.Operation == OPMODEL.NULL)
                        return s;
                }
                switch (dt.Operation)
                {
                    case OPMODEL.AND:
                        {
                            s += " " + toString(dt.LChild);
                            if (dt.RChild == null)
                                return s;
                            s += " " + toString(dt.RChild);
                            return s;
                        }
                    case OPMODEL.OR:
                        {
                            s += " " + toString(dt.LChild);
                            if (dt.RChild == null)
                                return s;
                            s += " " + toString(dt.RChild);
                            return s;
                        }
                    case OPMODEL.NOT:
                        {
                            s += " !" + toString(dt.LChild);
                            if (dt.RChild == null)
                                return s;
                            s += " " + toString(dt.RChild);
                            return s;
                        }
                    case OPMODEL.ORNOT:
                        {
                            s += " OR !" + toString(dt.LChild);
                            if (dt.RChild == null)
                                return s;
                            s += " " + toString(dt.RChild);
                            return s;
                        }

                }
            }
            return s;
        }
        public List<long> Evaluate(DemographicTree dt, bool NOT)
        {
            var respondents = Evaluate(dt);
            if (NOT == true)
            {
                var optionId = findFirstOptionId(dt);
                using (var entities = new DIPEF.DataImportDB())
                {
                    var option = entities.Options.FirstOrDefault(O => O.OptionId == optionId);
                    if (option != null)
                    {
                        var resp = entities.Responses.Where(R => R.QuestionId == option.QuestionId).Select(r => r.RespondentId).Distinct().ToList();
                        respondents = resp.Except(respondents).ToList();
                    }
                }
            }
            return respondents;
        }
        public List<long> Evaluate(DemographicTree dt)
        {           
            Console.WriteLine("{0}", dt.toString(dt));
            using (var entities = new DIPEF.DataImportDB())
            {
                if (dt.Operation == OPMODEL.NULL && dt.QuestionId == 0)
                {
                    var respondents = entities.Responses.Where(R=>R.OptionId == dt.OptionId).Select(r => r.RespondentId).Distinct().ToList();
                   
                    return respondents;
                }
                else if (dt.Operation == OPMODEL.NULL && dt.QuestionId > 0 && dt.OptionId==0 && String.IsNullOrEmpty(dt.Logic))
                {
                    var respondents = entities.Responses.Where(R => R.QuestionId == dt.QuestionId).Select(r => r.RespondentId).Distinct().ToList();
                    return respondents;
                }
               
                else if ( dt.QuestionId > 0 && !String.IsNullOrEmpty(dt.Logic))
                {
                    var respondents = new List<long>();
                    var responses = entities.Responses.Where(R => R.QuestionId == dt.QuestionId).ToList();
                    if (dt.Logic.Trim().ToUpper() == "N/A" || dt.Logic.ToLower().Trim() == "is null")
                        {
                        int projectId = responses.First().ProjectId;
                        List<long> allRespondents = entities.Respondents.Where(R => R.ProjectId == projectId).Select(A => A.RespondentId).ToList();
                        List<long> qRespondents = responses.Select(B => B.RespondentId).ToList();
                        respondents = allRespondents.Except(qRespondents).ToList();
                    }
                    else foreach(DIPEF.Response r in responses)
                    {
                       
                        var o = entities.Options.FirstOrDefault(O => O.OptionId == r.OptionId);

                        string expression = string.Format("{0} {1}", o.Value, dt.Logic);
                        if (Regex.IsMatch(dt.Logic, @"^[a-zA-Z]+$"))
                        {
                            if (o.Name.ToLower().Contains(dt.Logic.ToLower()))
                            {
                                respondents.Add(r.RespondentId);
                            }
                        }
                        else
                        {
                            if (EvaluateLogicalExpression(expression) == true)
                                if (!respondents.Contains(r.RespondentId))
                                    respondents.Add(r.RespondentId);
                        }

                    }
                    if (dt.Operation == OPMODEL.NULL)
                        return respondents;
                }
                Console.WriteLine(String.Format("{0} {1} {2}", dt.LChild == null ? "null":dt.LChild.toString(dt.LChild), dt.Operation, dt.RChild == null ? "null": dt.RChild.toString(dt.RChild)));
                switch (dt.Operation)
                {
                    case OPMODEL.AND:
                        {
                            var lResp = Evaluate(dt.LChild);
                            if (dt.RChild == null)
                                return lResp;                            
                            lResp = lResp.Intersect(Evaluate(dt.RChild)).ToList();
                            return lResp.Distinct().ToList();
                        }
                    case OPMODEL.OR:
                        {
                            var lResp = Evaluate(dt.LChild);
                            if (dt.RChild == null)
                                return lResp;
                            var rResp = Evaluate(dt.RChild);
                            var resp = lResp.Union(rResp).ToList();
                            return resp;
                        }
                    case OPMODEL.NOT:
                        {
                            var lResp = Evaluate(dt.LChild);
                            if (dt.RChild == null)
                                return lResp;
                            var rResp = Evaluate(dt.RChild);
                            
                            var resp = lResp.Except(rResp).ToList();                     
                            return resp;
                        }
                    case OPMODEL.ORNOT:
                        {
                            var lResp = Evaluate(dt.LChild);
                            if (dt.RChild == null)
                                return lResp;
                            var rResp = Evaluate(dt.RChild);
                            var resp = lResp.Except(rResp).ToList();
                            return resp;
                        }

                }
                return new List<long>();
            }
    }
}
}

Solution

  • OK - My original code was a hack and I forced myself to write a much more generic set expression evaluator. I still input an array of 'Expression' objects but very much less specific to my original attempt. Here is the result and how it gets used:

    using System;
    using System.Collections.Generic;
    using System.IO;
    using System.Linq;
    using System.Text;
    
    namespace SetExpressions
    {
        public class Expression
        {
            public List<long> Value { get; set; }
    
            public char Op { get;set; }
            public bool lParen { get; set; }
            public bool rParen { get; set; }
          
            public Expression ( List<long> value, char op, bool lp, bool rp)
            {
                Op = op;
                Value = value;
                lParen = lp;
                rParen = rp;
            }
        }
        class SetExpressionEvaluator
        {
    
            Stack<List<long>> valueStack = new Stack<List<long>>();
            Stack<char> operatorStack = new Stack<char>();
            private void EvaluatePartial()
            {
                char op = operatorStack.Pop();
                while (op != '(')
                {
                    var right = valueStack.Pop();
                    var left = valueStack.Pop();
                    var newval = Apply(op, left, right);
                    valueStack.Push(newval);
                    op = operatorStack.Pop();
                }
            }
            public List<long> Apply(char op, List<long> left, List<long> right)
            {
                List<long> res = new List<long>();
                switch (op)
                {
                    case '&':
                        res = left.Intersect(right).ToList(); break;
                    case '|':
                        res = left.Union(right).ToList(); break;
                    case '!':
                        res = left.Except(right).ToList(); break;
                }
                return res;
            }
            public List<long> Evaluate(Expression [] expressions)
            {
                if (expressions.Length == 0)
                {
                    return new List<long>();
                }
                
                operatorStack.Clear();
                valueStack.Clear();
    
                foreach (Expression expression in expressions)
                {
                     valueStack.Push(expression.Value);                                                       
                    if (expression.lParen)
                    {
                        operatorStack.Push('(');
                    }
                    if (expression.Op != '\0')
                        operatorStack.Push(expression.Op);
                    if (expression.rParen)
                    {
                        EvaluatePartial();
                    }            
                }
    
                while (operatorStack.Count > 0)
                {
                    var right = valueStack.Pop();
                    var left = valueStack.Pop();
                    var newval = Apply(operatorStack.Pop(), left, right);
                    valueStack.Push(newval);
                }
                return valueStack.Pop();
            }      
        }
    }
    

    The Test Code:

    using SetExpressions;
    using System;
    using System.Collections.Generic;
    
    namespace ExpressionEvaluator
    {
        class Program
        {
            static void Main(string[] args)
            {
                SetExpressionEvaluator x = new SetExpressionEvaluator();
                var A = new List<long> { 1, 2, 3 };
                var B = new List<long> { 3, 4, 5 };
                var C = new List<long> { 5, 6, 7 };
    
                // A & B
                List<Expression> list = new List<Expression>();
                list.Add(new Expression(A, '&', false, false));
                list.Add(new Expression(B, '\0', false, false));
                var X = x.Evaluate(list.ToArray());
                Console.WriteLine(String.Join(',', X));
                list = new List<Expression>();
                list.Add(new Expression(A, '&', false, false));
                list.Add(new Expression(B, '|', true, false));
                list.Add(new Expression(C, '\0', false, true));
                X = x.Evaluate(list.ToArray());
                Console.WriteLine(String.Join(',', X));
            }
        }
    

    }