I had a scaling problem in a production environment. A small numbers of records processed fine. But larger volumes took exponentially longer. These classes are just to illustrate the point.
Party {
DateTime When { get; set; }
ICollection<Attendee> Attendees { get; set; }
}
Attendee {
ICollection<Gift> Gifts { get; set; }
ICollection<Allergy> Allergies { get; set; }
}
IEnumerable<Party> GetAllPartiesByDate(DateTime date) {
var parties = Context.Parties
.Include(p => p.Attendees).ThenInclude(a => a.Gifts)
.Include(p => p.Attendees).ThenInclude(a => a.Allergies)
.Where(p.When == date)
.ToList();
return parties;
}
With 4 matching parties, and 7 attendees at each party, where each attendee has 3 gifts and 2 allergies
That's 172 rows in the DB across 4 tables
4 + (4*7) + (4*7*3) + (4*7*2)
EF returns that via a single SQL query with 168 rows, not too bad.
4 * (7) * (3) * (2)
But make each of those values just 10 times larger and you get 142,840 rows in the DB
40 + (40*70) + (40*70*30) + (40*70*20)
But the results set from the singe EF query explodes on each successive one to many relationship and tries to return 40 * 70 * 30 * 20
1,680,000 rows
When using more modern versions of Entity Framework Core the solution to a Cartesian Explosion is to use AsSplitQuery
For unfortunate technical reasons we cannot update Entity Framework Core past version 3.1.
But how can you implement AsSplitQuery in EF Core 3.1?
The solution is to:
Remember AutoDetectChangesEnabled is not the same mechanism as QueryTrackingBehavior.NoTracking aka AsNoTracking
/// <summary>
/// The strategy for split loading when you know the second order volume is low.
/// I.e. We know there are lots of parties and attendees, but each attendee has a low number of gifts and allergies.
/// This loads parties and then everything else over two SQL queries
/// </summary>
IEnumerable<Party> GetAllPartiesByDate(DateTime date)
{
try
{
//Turn off change tracking, note this is NOT the same as AsNoTracking
Context.ChangeTracker.AutoDetectChangesEnabled = false;
//Keep the search condition.
IQueryable<Party> partyQuery = Context.Parties.Where(p.When == date);
//For relationships where you know the count is small you can still use includes
//Thus producing a single SQL round trip.
//But projections do not honour includes so you must load from the DbSet via projected IDs instead.
var partyIDsQuery = partyQuery.Select(p => new { MatchPartyId = p.Id });
//Join to projected IDs
var attendeesQuery = Context.Attendees.Join(partyIDsQuery, a => a.PartyId, i => i.MatchPartyId, (a,i) => a)
.Include(a => a.Allergies)
.Include(a => a.Gifts);
//Get it into memory
var allAttendees = attendeesQuery.ToList();
//C# group by not SQL group by
var allAttendeesLookup = allAttendees.GroupBy(a => a.PartyId).ToDictionary(g => g.Key, g => new List<Attendee>(g));
//Triger an SQL call with ToList, this creates instances of Party.
List<Party> parties = partyQuery.ToList();
foreach(var party in parties)
{
//Assign the loaded list
if(allAttendeesLookup.TryGetValue(party.Id, out var attendees))
party.Attendees = attendees;
else
party.Attendees = new List<Attendee>(0);
//Tell EF you have loaded it
Context.Entry<Party>(party).Collection(p => p.Attendees).IsLoaded = true;
}
return parties;
}
finally
{
Context.ChangeTracker.AutoDetectChangesEnabled = true;
}
}
/// <summary>
/// The strategy for split loading when you know the volume is high for every relationship.
/// This loads parties and then each relationship via sperate SQL queries.
/// </summary>
IEnumerable<Party> GetAllPartiesByDate(DateTime date)
{
try
{
//Turn off change tracking, note this is NOT the same as AsNoTracking
Context.ChangeTracker.AutoDetectChangesEnabled = false;
//Keep the search condition.
IQueryable<Party> partyQuery = Context.Parties.Where(p.When == date);
//Load just the attendees via projection as no includes are being used and it's a top level relationship
var allAttendeesGroup = partyQuery.Select(p => new { p.Id, d.Attendees });
var allAttendeesLookup = allAttendeesGroup.ToDictionary(e => e.Id, e => e);
//Load party
List<Party> parties = partyQuery.ToList();
//Get link IDs for leaves
var partyIDsQuery = partyQuery.Select(p => new { MatchPartyId = p.Id });
var attendeeIDsQuery = Context.Attendees.Join(partyIDsQuery, a => a.PartyId, i => i.MatchPartyId, (a,i) => a)
.Select(a => a. new { MatchAttendeeId = a.Id });
//Load leaves and make lookups in mem
var allGiftsQuery = Context.Gifts.Join(attendeeIDsQuery, g => g.AttendeeId, i => i.MatchAttendeeId, (g,i) => g);
var allGifts = allGiftsQuery.ToList();
var allGiftsLookUp = allGifts.GroupBy(g => g.AttendeeId).ToDictionary(g => g.Key, g => new List<Gift>(g));
var allAllergysQuery = Context.Allergys.Join(attendeeIDsQuery, a => a.AttendeeId, i => i.MatchAttendeeId, (a,i) => a);
var allAllergys = allAllergysQuery.ToList();
var allAllergysLookUp = allAllergys.GroupBy(a => a.AttendeeId).ToDictionary(g => g.Key, g => new List<Allergy>(g));
//Connect everything
foreach(var party in parties)
{
//Assign the loaded list
if(allAttendeesLookup.TryGetValue(party.Id, out var attendees))
party.Attendees = attendees;
else
party.Attendees = new List<Attendee>(0);
//Tell EF you have loaded it
Context.Entry<Party>(party).Collection(p => p.Attendees).IsLoaded = true;
//Connect leaves
foreach(var attendee in party.Attendees)
{
if(allGiftsLookUp.TryGetValue(attendee.Id, out var gifts))
attendee.Gifts = gifts;
else
attendee.Gifts = new List<Gift>(0);
Context.Entry<Attendee>(attendee).Collection(a => a.Gifts).IsLoaded = true;
if(allAllergysLookUp.TryGetValue(attendee.Id, out var allergys))
attendee.Allergies = allergys;
else
attendee.Allergies = new List<Allergy>(0);
Context.Entry<Attendee>(attendee).Collection(a => a.Allergies).IsLoaded = true;
}
}
return parties;
}
finally
{
Context.ChangeTracker.AutoDetectChangesEnabled = true;
}
}
You will likely need to use a mixture of the two strategies depending on the expected volumes for each relationship you are dealing with.