Search code examples
c#linqdistinctlinq-to-dataset

how to perform sum on distinct different then the group field


bellow is the C# code.

the problem is the calculated SumCustomerDebt.

i need to group according to the CustomerLocation, but the CustomerDebt repeat multiple times in the table (for each customer). i need to calculate SumCustomerDebt - as sum of CustomerDebt according to distinct CustomerID. how can i do it?

protected DataTable generateData()
{
    DataTable dt = new DataTable();
    DataColumn column1 = new DataColumn("salesId", Type.GetType("System.Int32"));
    dt.Columns.Add(column1);
    DataColumn column2 = new DataColumn("CustomerLocation", Type.GetType("System.String"));
    dt.Columns.Add(column2);
    DataColumn column3 = new DataColumn("CustomerID", Type.GetType("System.Int32"));
    dt.Columns.Add(column3);
    DataColumn column4 = new DataColumn("CustomerDebt", Type.GetType("System.Int32"));
    dt.Columns.Add(column4);
    DataColumn column5 = new DataColumn("SubTotal", Type.GetType("System.Int32"));
    dt.Columns.Add(column5);


    DataRow dr = dt.NewRow();
    dr["salesId"] = 1;
    dr["CustomerLocation"] = "Chichago";
    dr["CustomerID"] = 1;
    dr["CustomerDebt"] = 100;
    dr["SubTotal"] = 10;
    dt.Rows.Add(dr);

    dr = dt.NewRow();
    dr["salesId"] = 2;
    dr["CustomerLocation"] = "Chichago";
    dr["CustomerID"] = 1;
    dr["CustomerDebt"] = 100;
    dr["SubTotal"] = 40;
    dt.Rows.Add(dr);

    dr = dt.NewRow();
    dr["salesId"] = 3;
    dr["CustomerLocation"] = "Chichago";
    dr["CustomerID"] = 2;
    dr["CustomerDebt"] = 50;
    dr["SubTotal"] = 30;
    dt.Rows.Add(dr);

    dr = dt.NewRow();
    dr["salesId"] = 4;
    dr["CustomerLocation"] = "Miami";
    dr["CustomerID"] = 3;
    dr["CustomerDebt"] = 20;
    dr["SubTotal"] = 50;
    dt.Rows.Add(dr);

    return dt;

}

protected void test()
{
    DataTable dt = generateData();

    var result = from row in dt.AsEnumerable()
                  group row by new
                  {
                      CustomerLocation = row.Field<string>("CustomerLocation"),
                  } into grp
                  select new
                  {
                      CustomerLocation = grp.Key.CustomerLocation,
                      SumSubTotal = grp.Sum(r => r.Field<int>("SubTotal")),
                      OrderCount = grp.Count(),
                      SumCustomerDebt = grp.Sum(r => r.Field<int>("CustomerDebt")),  //this is incorrect, how can i calculate it?
                  };


    foreach (var item in result)
    {
        string info = string.Format("CustomerLocation={0},SumSubTotal={1},OrderCount={2},SumCustomerDebt={3}", item.CustomerLocation, item.SumSubTotal, item.OrderCount, item.SumCustomerDebt);
        Console.WriteLine(info);

    }

}

Solution

  • Group location rows by customer id and select debt of first customer from each group:

    var query = from r in generateData().AsEnumerable()
                group r by r.Field<string>("CustomerLocation") into g
                select new
                {
                    CustomerLocation = g.Key,
                    SumSubTotal = g.Sum(r => r.Field<int>("SubTotal")),
                    OrderCount = g.Count(),
                    SumCustomerDebt = 
                          g.GroupBy(r => r.Field<int>("CustomerID"))
                           .Sum(cg => cg.First().Field<int>("CustomerDebt"))
                };
    

    This produces your expected results:

    { CustomerLocation = Chichago, SumSubTotal = 80, OrderCount = 3, SumCustomerDebt = 150 } { CustomerLocation = Miami, SumSubTotal = 50, OrderCount = 1, SumCustomerDebt = 20 }