Search code examples
c#linqfunctional-programming

Splitting a list of files into groups by a maximum sum


The following code are joining a list of reports with report receivers. A report receiver can have a long list of reports, so we need to check maximum attachments size and create a new mail when we reach the limit.

I would like to write the foreach (var rpt in receiver.Reports) in a more elegant way. Maybe using Linq so I can continue the group by statement.

internal static (List<MailToSend> mailToSends, List<Report> reportsNotUsed) CreateMailsWithReports(
    IList<Report> reports,
    IEnumerable<ReportReceiver> reportReceivers,
    long maxAttachmentSizeInBytes)
{
    var mails = reportReceivers.Join(reports,
            outerKeySelector: receiver => new  {receiver.ReportTypeId, receiver.FundId},
            innerKeySelector: report => new  {report.ReportTypeId, report.FundId},
            resultSelector: (receiver, report) => new
            {
                Receiver = receiver,
                Report = report
            })
        .GroupBy(x => x.Receiver.Email, x => x.Report)
        .Select(g => new MailToSend { CustomerEmail = g.Key, Reports = g.ToList() }).ToList();


    var mailToSends = new List<MailToSend>();
    foreach (var receiver in mails)
    {
        long byteUsed = 0;
        var mail = new MailToSend { CustomerEmail = receiver.CustomerEmail};
        mailToSends.Add(mail);
        
        foreach (var rpt in receiver.Reports)
        {
            if (byteUsed + rpt.SizeInBytes <= maxAttachmentSizeInBytes)
            {
                byteUsed += rpt.SizeInBytes;
                mail.Reports.Add(rpt);
            }
            else
            {
                mail = new MailToSend { CustomerEmail = receiver.CustomerEmail};
                mailToSends.Add(mail);
                mail.Reports.Add(rpt);
                byteUsed = rpt.SizeInBytes;
            }
        }
    }

    // warn about reports without receivers 
    var reportsNotUsed = reports.Except(mails.SelectMany(x=>x.Reports)).ToList();
    
    return (mailToSends, reportsNotUsed);
}

Solution

  • I don't think that you can write much more readable code, I would argue that the problem itself is more idiomatically solved in C# via imperative style. One small thing I can suggest - moving the List<Report> split into some method and perform the split before creating mails. For example (not tested):

    internal static (List<MailToSend> mailToSends, List<Report> reportsNotUsed) CreateMailsWithReports(
        IList<Report> reports,
        IEnumerable<ReportReceiver> reportReceivers,
        long maxAttachmentSizeInBytes)
    {
        var mailToSends = reportReceivers.Join(reports,
                outerKeySelector: receiver => new { receiver.ReportTypeId, receiver.FundId },
                innerKeySelector: report => new { report.ReportTypeId, report.FundId },
                resultSelector: (receiver, report) => new
                {
                    Receiver = receiver,
                    Report = report
                })
            .GroupBy(x => x.Receiver.Email, x => x.Report)
            .SelectMany(g => ChunkByMaxSize(g)
                .Select(r => new MailToSend
                {
                    CustomerEmail = g.Key,
                    Reports = r
                }))
            .ToList();
        
        // warn about reports without receivers 
        var reportsNotUsed = reports.Except(mailToSends.SelectMany(x => x.Reports)).ToList();
    
        return (mailToSends, reportsNotUsed);
    
        IEnumerable<List<Report>> ChunkByMaxSize(IEnumerable<Report> reports)
        {
            var agg = new List<Report>();
            long byteUsed = 0;
            foreach (var rpt in reports)
            {
                if (byteUsed + rpt.SizeInBytes <= maxAttachmentSizeInBytes)
                {
                    byteUsed += rpt.SizeInBytes;
                    agg.Add(rpt);
                }
                else
                {
                    yield return agg;
                    agg = new List<Report> {rpt};
                    byteUsed = rpt.SizeInBytes;
                }
            }
    
            yield return agg;
        }
    }