In the following code I am building a dynamic IQueryable where clause. The second line takes 100s of ms to execute. Is there a way to speed it up besides caching?
var lambda = Expression.Lambda<Func<T, bool>>(expression, _parameter);
var whereQuery = queryable.Where(lambda);
Where expression is a BinaryExpression and _parameter is a MemberExpression and queryable is an IQueryable.
EDIT
Here is the full code. It is a class that takes a string and converts it into a lambda expression.
public class StringToWhereConverter<T>
{
abstract class ClauseElement { }
class Clause : ClauseElement
{
public string FieldName { get; set; }
public string Operator { get; set; }
public string Value { get; set; }
public override string ToString()
{
return FieldName + " " + Operator + " " + Value;
}
}
class Connector : ClauseElement
{
public string AndOrOr { get; set; }
}
class ExpressionClause : ClauseElement
{
public BinaryExpression BinaryExpression { get; set; }
}
private ParameterExpression _parameter;
private Dictionary<string, PropertyInfo> _propertyLookup;
public StringToWhereConverter()
{
_parameter = Expression.Parameter(typeof(T), "e");
PropertyInfo[] propertyInfos = typeof(T).GetProperties();
_propertyLookup = propertyInfos.ToDictionary(p => p.Name.ToLower());
}
public IQueryable<T> WhereQuery(IQueryable<T> queryable, string whereStatement)
{
if (string.IsNullOrWhiteSpace(whereStatement))
return queryable;
var where = whereStatement.Trim().ToLower();
var words = SeparateBySpaces(where);
var clauses = CreateIntoClauses(words);
var expressions = CreateIntoExpressions(clauses);
var expression = CreateIntoSingleExpression(expressions);
var lambda = Expression.Lambda<Func<T, bool>>(expression, _parameter);
var watch = Stopwatch.StartNew();
var whereQuery = queryable.Where(lambda);
watch.Stop();
var elapsedMs = watch.ElapsedMilliseconds;
return whereQuery;
}
private List<string> SeparateBySpaces(string clause)
{
var words = clause.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).ToList();
words.ForEach(w => w.Trim());
if (IsWordWhere(words[0]))
words.RemoveAt(0);
List<string> clearerWords = new List<string>();
foreach (var word in words)
{
if (IsWordValidField(word))
{
clearerWords.Add(word);
continue;
}
if (IsWordContainingOperator(word))
{
var clearerWord = word;
var indexOfOperator = clearerWord.IndexOfAny(new char[] { '>', '<', '=', '!' });
if (indexOfOperator != 0) // if not at beginning, add space in front
{
clearerWord = clearerWord.Insert(indexOfOperator, " ");
indexOfOperator++;
}
// test soemthing like feld>>>value or field >= >value
if (indexOfOperator + 1 != clearerWord.Length) // if not at end, add space depending
{
int indexOfLastOperator = indexOfOperator + 1;
var isCharAnOperator = true;
do
{
isCharAnOperator = indexOfLastOperator != clearerWord.Length && IsWordContainingOperator(clearerWord[indexOfLastOperator].ToString());
if (isCharAnOperator)
{
indexOfLastOperator++;
}
else
{
if (indexOfLastOperator == clearerWord.Length) continue;
clearerWord = clearerWord.Insert(indexOfLastOperator, " "); // add a space after the operator
}
} while (isCharAnOperator);
}
var clearWordsWithOperator = clearerWord.Split(' ').ToList();
clearWordsWithOperator.ForEach(c => clearerWords.Add(c));
}
}
return clearerWords;
}
private List<ClauseElement> CreateIntoClauses(List<string> words)
{
var originalWords = new List<string>(words);
var clauseElements = new List<ClauseElement>();
var dontExitLoop = true;
do
{
var subclause = words.Take(3).ToList();
if (subclause.Count != 3)
{
var message = "A syntax error occurred. The phrase '" + string.Join(" ", originalWords) + "' is an invalid where clause.";
throw new SyntaxException(message);
}
bool isValid = IsWordValidField(subclause[0])
&& IsWordValidOperator(subclause[1]);
if (isValid)
{
var clauseelement = new Clause()
{
FieldName = subclause[0],
Operator = subclause[1],
Value = subclause[2],
};
clauseElements.Add(clauseelement);
}
else
{
var message = "A syntax error occurred. The phrase '" + string.Join(" ", words) + "' is an invalid where clause.";
throw new SyntaxException(message);
}
words.RemoveRange(0, 3);
if (words.Count != 0)
{
if (IsWordValidConnector(words[0]))
{
clauseElements.Add(new Connector() { AndOrOr = words[0] });
words.RemoveAt(0);
}
else
{
var message = "Expected 'and' or 'or' in the clause instead of '" + words[0] +"'";
throw new SyntaxException(message);
}
}
else
{
dontExitLoop = false;
}
} while (dontExitLoop);
return clauseElements;
}
private List<ClauseElement> CreateIntoExpressions(List<ClauseElement> clauses)
{
var expressions = new List<ClauseElement>();
foreach(var clause in clauses)
{
if (clause is Clause)
{
var cl = clause as Clause;
var expressionClause = new ExpressionClause();
expressionClause.BinaryExpression = ConvertClauseToExpression(cl, _parameter);
expressions.Add(expressionClause);
}
else
{
expressions.Add(clause);
}
}
return expressions;
}
private BinaryExpression CreateIntoSingleExpression(List<ClauseElement> expressionClauses)
{
if (expressionClauses.Count <= 2)
return (expressionClauses[0] as ExpressionClause).BinaryExpression;
BinaryExpression expression;
var dontExitLoop = true;
do
{
var sub = expressionClauses.Take(3).ToList();
var comparer = (sub[1] as Connector).AndOrOr;
expression = (sub[0] as ExpressionClause).BinaryExpression;
var right = (sub[2] as ExpressionClause).BinaryExpression;
expression = CreateBinaryExpression(comparer, expression, right);
expressionClauses.RemoveRange(0, 3);
expressionClauses.Insert(0, new ExpressionClause() { BinaryExpression = expression });
dontExitLoop = expressionClauses.Count > 1;
} while (dontExitLoop);
return expression as BinaryExpression;
}
private BinaryExpression ConvertClauseToExpression(Clause clause, Expression parameter)
{
var property = TryGetLookupValue(clause.FieldName);
var convertedValue = Convert.ChangeType(clause.Value, property.PropertyType);
var accessor = Expression.MakeMemberAccess(parameter, property);
var constant = Expression.Constant(convertedValue);
var comparison = CreateBinaryExpression(clause.Operator, accessor, constant);
return comparison;
}
private BinaryExpression CreateBinaryExpression(string comparer, Expression left, Expression right)
{
switch (comparer)
{
case ">":
return Expression.GreaterThan(left, right);
case "<":
return Expression.LessThan(left, right);
case ">=":
return Expression.GreaterThanOrEqual(left, right);
case "<=":
return Expression.LessThanOrEqual(left, right);
case "=":
return Expression.Equal(left, right);
case "<>":
case "!=":
return Expression.NotEqual(left, right);
case "and":
return Expression.AndAlso(left, right);
case "or":
return Expression.OrElse(left, right);
default:
throw new Exception();
}
}
private bool IsWordWhere(string word)
{
return word == "where";
}
private bool IsWordContainingOperator(string word)
{
return Regex.IsMatch(word, "<|>|=|!");
}
private bool IsWordValidField(string word)
{
return !Regex.IsMatch(word, "<|>|=|!");
}
private bool IsWordValidOperator(string word)
{
var isValid = word == "="
|| word == ">"
|| word == "<"
|| word == ">="
|| word == "<="
|| word == "!="
|| word == "<>";
return isValid;
}
private bool IsWordValidConnector(string word)
{
var lower = word.ToLower();
var IsValid = lower == "and"
|| lower == "or";
return IsValid;
}
private PropertyInfo TryGetLookupValue(string key)
{
if (_propertyLookup.ContainsKey(key))
{
return _propertyLookup[key];
}
else
{
var message = "A syntax error occurred. The field '" + key + "' is an invalid field name.";
throw new SyntaxException(message);
}
}
}
It can then be used with an IQueryable like this:
string clause = "invoiceid = 2 ";
var db = new ApLineContext();
var stringToWhereConverter = new StringToWhereConverter<ApLine>();
var query = stringToWhereConverter.WhereQuery(db.ApLines, clause);
var results = query.ToList();
EDIT 2 - PROFILER
If you need to see further detail I can add screen shots. I only opened the tree up until I found the problem at 1 hit.
Profiling shows that most time is spent in something called get_Provider
. My guess is that this is Entity Framework one-time initialization. You seem to have a bad profiler software because it does not resolve all assembly names.
Throw the first timing test away, then measure 1000000 more iterations. This should give you the steady-state performance of that piece of code.