Please read the question carefully before voting to close it. That is not a duplicate.
I am trying to build a generic method that returns list of entities of type T joined to logs of type AuditLog. Here is a LEFT JOIN interpretation in LINQ that I use
var result = from entity in entitySet
from auditLog in auditLogSet.Where(joinExpression).DefaultIfEmpty()
select new { entity, auditLog };
return result.GroupBy(item => item.entity)
.Select(group => new
{
Entity = group.Key,
Logs = group.Where(i => i.auditLog != null).Select(i => i.auditLog)
});
The problem is in joinExpression. I want to pass it to the WHERE clause, but it is different for different concrete types T (it is dependent on the entity variable), for example for a specific entity it could be
joinExpression = l => l.TableName == "SomeTable" && l.EntityId == entity.SomeTableId;
Notice entity.SomeTableId above. This is the reason I can't initialize joinExpression before the beginning of the query. How can I pass joinExpression as a parameter if it is actually dependent on "entity" variable, which is part of the query itself?
Your method may read something like this:
IQueryable<dynamic> GetEntities<T>(IDbSet<T> entitySet, Expression<Func<T, IEnumerable<AuditLog>>> joinExpression) where T : class
{
var result = entitySet.SelectMany(joinExpression,(entity, auditLog) => new {entity, auditLog});
return result.GroupBy(item => item.entity)
.Select(group => new
{
Entity = group.Key,
Logs = group.Where(i => i.auditLog != null).Select(i => i.auditLog)
});
}
And then you call it like this:
Expression<Func<SomeEntity, IEnumerable<AuditLog>>> ddd = entity => auditLogSet.Where(a => a.TableName == "SomeEntity" && entity.Id == a.EntityId).DefaultIfEmpty();
var result = GetEntities(entitySet, ddd).ToList();
I do not really see how this is different from the duplicate that I linked, in both cases you pass your query as an expression. Obviously you need to pass the query with all the dependencies, so you need your entity
value be a part of it.
Here is a self-contained working example:
using System;
using System.Collections.Generic;
using System.ComponentModel.DataAnnotations.Schema;
using System.Data.Entity;
using System.Data.Entity.ModelConfiguration;
using System.Linq;
using System.Linq.Expressions;
namespace SO24542133
{
public class AuditLog
{
public int Id { get; set; }
public string TableName { get; set; }
public int? EntityId { get; set; }
public string Text { get; set; }
}
public class SomeEntity
{
public int Id { get; set; }
public string Something { get; set; }
}
internal class AuditLogConfiguration : EntityTypeConfiguration<AuditLog>
{
public AuditLogConfiguration()
{
ToTable("dbo.AuditLog");
HasKey(x => x.Id);
Property(x => x.Id).HasColumnName("Id").IsRequired().HasDatabaseGeneratedOption(DatabaseGeneratedOption.Identity);
Property(x => x.TableName).HasColumnName("TableName").IsOptional().HasMaxLength(50);
Property(x => x.EntityId).HasColumnName("EntityId").IsOptional();
Property(x => x.Text).HasColumnName("Text").IsOptional();
}
}
internal class SomeEntityConfiguration : EntityTypeConfiguration<SomeEntity>
{
public SomeEntityConfiguration()
{
ToTable("dbo.SomeEntity");
HasKey(x => x.Id);
Property(x => x.Id).HasColumnName("Id").IsRequired().HasDatabaseGeneratedOption(DatabaseGeneratedOption.Identity);
Property(x => x.Something).HasColumnName("Something").IsOptional();
}
}
public interface IMyDbContext : IDisposable
{
IDbSet<AuditLog> AuditLogSet { get; set; }
IDbSet<SomeEntity> SomeEntitySet { get; set; }
int SaveChanges();
}
public class MyDbContext : DbContext, IMyDbContext
{
public IDbSet<AuditLog> AuditLogSet { get; set; }
public IDbSet<SomeEntity> SomeEntitySet { get; set; }
static MyDbContext()
{
Database.SetInitializer(new DropCreateDatabaseAlways<MyDbContext>());
}
public MyDbContext(string connectionString) : base(connectionString)
{
}
protected override void OnModelCreating(DbModelBuilder modelBuilder)
{
base.OnModelCreating(modelBuilder);
modelBuilder.Configurations.Add(new AuditLogConfiguration());
modelBuilder.Configurations.Add(new SomeEntityConfiguration());
}
}
class Program
{
private static void CreateTestData(MyDbContext context)
{
SomeEntity e1 = new SomeEntity { Something = "bla" };
SomeEntity e2 = new SomeEntity { Something = "another bla" };
SomeEntity e3 = new SomeEntity { Something = "third bla" };
context.SomeEntitySet.Add(e1);
context.SomeEntitySet.Add(e2);
context.SomeEntitySet.Add(e3);
context.SaveChanges();
AuditLog a1 = new AuditLog { EntityId = e1.Id, TableName = "SomeEntity", Text = "abc" };
AuditLog a2 = new AuditLog { EntityId = e1.Id, TableName = "AnotherTable", Text = "def" };
AuditLog a3 = new AuditLog { EntityId = e1.Id, TableName = "SomeEntity", Text = "ghi" };
AuditLog a4 = new AuditLog { EntityId = e2.Id, TableName = "SomeEntity", Text = "jkl" };
context.AuditLogSet.Add(a1);
context.AuditLogSet.Add(a2);
context.AuditLogSet.Add(a3);
context.AuditLogSet.Add(a4);
context.SaveChanges();
}
static IQueryable<dynamic> GetEntities<T>(IDbSet<T> entitySet, Expression<Func<T, IEnumerable<AuditLog>>> joinExpression) where T : class
{
var result = entitySet.SelectMany(joinExpression,(entity, auditLog) => new {entity, auditLog});
return result.GroupBy(item => item.entity)
.Select(group => new
{
Entity = group.Key,
Logs = group.Where(i => i.auditLog != null).Select(i => i.auditLog)
});
}
static void Main()
{
MyDbContext context = new MyDbContext("Data Source=(local);Initial Catalog=SO24542133;Integrated Security=True;");
CreateTestData(context);
Expression<Func<SomeEntity, IEnumerable<AuditLog>>> ddd = entity => context.AuditLogSet.Where(a => a.TableName == "SomeEntity" && entity.Id == a.EntityId).DefaultIfEmpty();
var result = GetEntities(context.SomeEntitySet, ddd).ToList();
// Examine results here
result.ToString();
}
}
}
And to address a point that is raised in another answer regarding DefaultIfEmpty
. The call to DefaultIfEmpty
is just a node on expression tree that you end up with in the ddd
variable. You don't have to include it in this expression tree and instead add it dynamically in your GetEntites
method to the expression tree that you receive as a parameter.
EDIT:
To touch on other issues with the code, it is correct, that the sql generated by this query is less then optimal. Particularly bad thing about it is that we first flatten the join with SelectMany
and then un-flatten it back again with GroupBy
. This does not make much sense. Let's see how we can improve that. First, let's get rid of this dynamic nonsense. Our result set item can be defined like this:
class QueryResultItem<T>
{
public T Entity { get; set; }
public IEnumerable<AuditLog> Logs { get; set; }
}
Good. Now let's rewrite our EF query so that it does not flatten and then groups by. Let's start simple and come up with a non-generic implementation, we will improve that later. Our query can look something like this:
static IQueryable<QueryResultItem<SomeEntity>> GetEntities(IDbSet<SomeEntity> entitySet, IDbSet<AuditLog> auditLogSet)
{
return entitySet.Select(entity =>
new QueryResultItem<SomeEntity>
{
Entity = entity,
Logs = auditLogSet.Where(a => a.TableName == "SomeEntity" && entity.Id == a.EntityId)
});
}
Nice and clean. Now let's see what we need to do to make it work with any entity. First of all let's make the expression itself easier to manipulate by pulling it in a separate variable like this:
static IQueryable<QueryResultItem<SomeEntity>> GetEntities(IDbSet<SomeEntity> entitySet, IDbSet<AuditLog> auditLogSet)
{
Expression<Func<SomeEntity, QueryResultItem<SomeEntity>>> entityExpression = entity =>
new QueryResultItem<SomeEntity>
{
Entity = entity,
Logs = auditLogSet.Where(a => a.TableName == "SomeEntity" && entity.Id == a.EntityId)
};
return entitySet.Select(entityExpression);
}
We obviously need to be able to pass the where expression from somewhere, so let's separate this part to a variable as well:
static IQueryable<QueryResultItem<T>> GetEntities<T>(IDbSet<T> entitySet, IDbSet<AuditLog> auditLogSet, Expression<Func<AuditLog, T, bool>> whereTemplate) where T : class
{
Expression<Func<AuditLog, bool>> whereExpression = null;
Expression<Func<T, QueryResultItem<T>>> entityExpression = entity =>
new QueryResultItem<T>
{
Entity = entity,
Logs = auditLogSet.Where(whereExpression)
};
whereExpression = SubstituteSecondParameter(whereTemplate, entityExpression.Parameters[0]);
return entitySet.Select(entityExpression);
}
So now where expression is in a separate variable, but we also took a chance to do a few other changes as well. Our method now is generic again, so it can accept any entity. Also note that we are passing a where template in but it has an extra generic parameter, which substitutes for the entity
variable that we are dependent on. Since type is different we can't use this template directly in our expression, so we need some way to translate it into where expression that we can use: the mysterious SubstituteSecondParameter method represents this. The last thing to note about this piece of code, that we are assigning the result of the substitution back to the variable that we used above in our expression. Will this work? Well, yes. The expression represents an anonymous method and by the merit of it lifts local variables and parameters to form a closure. If you have ReSharper you will notice that it warns you that the whereExpression
variable gets modified after it was lifted. In most cases this is unintentional, but in our case this is exactly what we want to do, substitute the temporary whereExpression to the real one.
Next step is to consider what we are going to pass to our method. This is simple:
Expression<Func<AuditLog, SomeEntity, bool>> whereExpression2 = (l, entityParam) => l.TableName == "SomeEntity" && l.EntityId == entityParam.Id;
This will work out nicely. Now the last piece of the puzzle, how do we convert this expression with an extra parameter to the expression which has this parameter inside it. Well the bad news is you can't modify expression trees you have to re-build them from scratch. The good news, that Marc can help us here. First, let's define a simple Expression Visitor class it is based on what is already implemented in the BCL and looks simple:
class ExpressionSubstitute : ExpressionVisitor
{
private readonly Expression _from;
private readonly Expression _to;
public ExpressionSubstitute(Expression from, Expression to)
{
_from = from;
_to = to;
}
public override Expression Visit(Expression node)
{
return node == _from ? _to : base.Visit(node);
}
}
All we have is a constructor that tells us what node to substitute with what node, and an override to do the check / substitution. The SubstituteSecondParameter
is also is not very complicated, it's a two liner:
static Expression<Func<AuditLog, bool>> SubstituteSecondParameter<T>(Expression<Func<AuditLog, T, bool>> expression, ParameterExpression parameter)
{
ExpressionSubstitute swapParam = new ExpressionSubstitute(expression.Parameters[1], parameter);
return Expression.Lambda<Func<AuditLog, bool>>(swapParam.Visit(expression.Body), expression.Parameters[0]);
}
Look at the signature, we take an expression with two parameters and a parameter and return an expression with only one parameter. To do this we create out visitor passing it our second parameter as "to" and the method parameter argument as "from", and then construct a new Lambda Expression that only has a single parameter, which we take from the original expression. And that concludes it. To put our changes together these are the new classes/methods:
class QueryResultItem<T>
{
public T Entity { get; set; }
public IEnumerable<AuditLog> Logs { get; set; }
}
class ExpressionSubstitute : ExpressionVisitor
{
private readonly Expression _from;
private readonly Expression _to;
public ExpressionSubstitute(Expression from, Expression to)
{
_from = from;
_to = to;
}
public override Expression Visit(Expression node)
{
return node == _from ? _to : base.Visit(node);
}
}
static Expression<Func<AuditLog, bool>> SubstituteSecondParameter<T>(Expression<Func<AuditLog, T, bool>> expression, ParameterExpression parameter)
{
ExpressionSubstitute swapParam = new ExpressionSubstitute(expression.Parameters[1], parameter);
return Expression.Lambda<Func<AuditLog, bool>>(swapParam.Visit(expression.Body), expression.Parameters[0]);
}
static IQueryable<QueryResultItem<T>> GetEntities2<T>(IDbSet<T> entitySet, IDbSet<AuditLog> auditLogSet, Expression<Func<AuditLog, T, bool>> whereTemplate) where T : class
{
Expression<Func<AuditLog, bool>> whereExpression = null;
Expression<Func<T, QueryResultItem<T>>> entityExpression = entity =>
new QueryResultItem<T>
{
Entity = entity,
Logs = auditLogSet.Where(whereExpression)
};
whereExpression = SubstituteSecondParameter(whereTemplate, entityExpression.Parameters[0]);
return entitySet.Select(entityExpression);
}
And this is how we call them:
Expression<Func<AuditLog, SomeEntity, bool>> whereExpression2 = (l, entityParam) => l.TableName == "SomeEntity" && l.EntityId == entityParam.Id;
var r2 = GetEntities2(context.SomeEntitySet, context.AuditLogSet, whereExpression2).ToList();
Much better!
And one last thing. This is the SQL generated by EF as the result of this query. As you can see it's very simple and readable (at least as far as EF generated sql goes):
SELECT
[Project1].[Id] AS [Id],
[Project1].[Something] AS [Something],
[Project1].[C1] AS [C1],
[Project1].[Id1] AS [Id1],
[Project1].[TableName] AS [TableName],
[Project1].[EntityId] AS [EntityId],
[Project1].[Text] AS [Text]
FROM ( SELECT
[Extent1].[Id] AS [Id],
[Extent1].[Something] AS [Something],
[Extent2].[Id] AS [Id1],
[Extent2].[TableName] AS [TableName],
[Extent2].[EntityId] AS [EntityId],
[Extent2].[Text] AS [Text],
CASE WHEN ([Extent2].[Id] IS NULL) THEN CAST(NULL AS int) ELSE 1 END AS [C1]
FROM [dbo].[SomeEntity] AS [Extent1]
LEFT OUTER JOIN [dbo].[AuditLog] AS [Extent2] ON (N'SomeEntity' = [Extent2].[TableName]) AND ([Extent2].[EntityId] = [Extent1].[Id])
) AS [Project1]
ORDER BY [Project1].[Id] ASC, [Project1].[C1] ASC