I have list of objects which are grouped by one column as below.
I have millions of records, and it is taking more than 30 minutes. How can I write the below code efficiently?
List<Voter> voterList = new List<Voter>();
IEnumerable<IGrouping<string, MemberInfo>> groupByLastName = infoList.GroupBy(info => info.LastName).Select(i => i);
foreach (List<MemberInfo> lastName in groupByLastName)
{
foreach (MemberInfo member in lastName)
{
MemberInfo info = memberService.GetMemberDetails(member.FirstName);
if (info.Age > 18)
{
voterList.Add(new Voter{
VoterId = member.VoterId,
Age = member.Age
});
}
}
}
Use Parallel.ForEach
Add your result object to a thread safe collection.
Below is some pseudo code. I can't tell what objects you have at your disposal because your original post uses "var" multiple time.
BlockingCollection<Voter> bc = new BlockingCollection<Voter>();
Parallel.ForEach(myCollection, (e) => {
bc.Add(e);
});
So I would first collect all of your inputValues (to your wcf)
ICollection allTheInputLastNames = new List();
IEnumerable<IGrouping<string, MemberInfo>> groupByLastName = infoList.GroupBy(info => info.LastName).Select(i => i);
foreach (List<MemberInfo> lastName in groupByLastName)
{
foreach (MemberInfo member in lastName)
{
allTheInputLastNames.add(member.FirstName);
}
}
Now, because you have so many, hopefully this runs fairly quickly.
Now that you have collected all the inputs, you want to use the Parallel.ForEach.
I've created a generic example below.
Where I have inputValues, you would have your allTheInputLastNames.
Where I create a new ResultObject, you would make your wcf-service call.
and where I do a "StringLength % 2", you would your info.Age check.
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
namespace MyApp.ParallelStuff
{
public class ParallelExampleOne
{
public void ExampleOne()
{
ICollection<string> inputValues = new List<string>();
for (int i = 1; i < 10000; i++)
{
inputValues.Add("MyValue" + Convert.ToString(i));
}
CancellationTokenSource ct = new CancellationTokenSource();
BlockingCollection<ResultObject> finalItems = new BlockingCollection<ResultObject>();
Parallel.ForEach(inputValues, (currentInputItem) =>
{
ResultObject ro = new ResultObject(currentInputItem.Length, currentInputItem);
if (ro.StringLength % 2 == 0)
{
finalItems.Add(ro);
}
});
Console.WriteLine("ExampleOne.finalItems.Count={0}", finalItems.Count);
string temp = string.Empty;
}
public void ExampleTwo()
{
ICollection<string> inputValues = new List<string>();
for (int i = 1; i < 10000; i++)
{
inputValues.Add("MyValue" + Convert.ToString(i));
}
CancellationTokenSource ct = new CancellationTokenSource();
BlockingCollection<ResultObject> finalItems = new BlockingCollection<ResultObject>();
ParallelOptions options = new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount, CancellationToken = ct.Token };
ParallelLoopResult results = Parallel.ForEach(inputValues, options, currentInputValue =>
{
ResultObject ro = new ResultObject(currentInputValue.Length, currentInputValue);
if (ro.StringLength % 2 == 0)
{
finalItems.Add(ro);
}
});
Console.WriteLine("ExampleTwo.finalItems.Count={0}", finalItems.Count);
string temp = string.Empty;
}
internal class ResultObject
{
internal int StringLength { get; private set; }
internal string OutputValue { get; private set; }
public ResultObject(int stringLength, string inputValue)
{
this.StringLength = stringLength;
this.OutputValue = inputValue + "MyOutputSuffix";
}
}
}
Also note how you can read my code, because I did not use "var" for my variable declarations.