I am trying to understand how the "Parallel.ForEach" function works. I have read the MSDN about it and as an overall understanding it seems that, the more computations you put in the loop the faster it goes in comparison to a normal foreach loop.
Current computer has 4 Cores. (I will have installed my 24 core computer in two days.)
However I think I need some expertise insight in this. I have a sorting algorithm that do a sort in about 10 seconds on 1 core.
I have put a complete code where I do it on one core and where I do it in a: Parallel.ForEach loop with 4 cores.
I simply wonder if it is possible to speed up this sort using more cores in any possible way?
Running testsortFunction() produces below result:
void testsortFunction()
{
String resultstring1 = ""; String resultstring2 = "";
resultstring1 = sortingtestBENCHMARKS(false);
resultstring2 = sortingtestBENCHMARKS(true);
MessageBox.Show(resultstring1 + "\n\n" + resultstring2);
}
String sortingtestBENCHMARKS(bool useParallel)
{
List<String> sortedLIST = new List<String>();
List<String> minusLIST = new List<String>();
List<String> plusLIST = new List<String>();
var stopwatch = new Stopwatch();
//Add 3 million elements
for (double i = 0; i < 1000000; i += 1)
{
plusLIST.Add(i + ",awb/aje" + " - " + "ddfas/asa" + " - " + "asoo/qwa");
}
for (double i = 1; i < 2000000; i += 1)
{
minusLIST.Add("-" + i + ",awb/aje" + " - " + "ddfas/asa" + " - " + "asoo/qwa");
}
//Do the sorting!
if (useParallel == false)
{
stopwatch.Start();
sortedLIST = sortLIST(minusLIST, plusLIST); //11 seconds
stopwatch.Stop();
}
else
{
stopwatch.Start();
Parallel.ForEach("dummy", (c) =>
{
sortedLIST = sortLIST(minusLIST, plusLIST); //32 seconds
});
stopwatch.Stop();
}
return "Elapsed Times in seconds(Using Parallel: " + useParallel + "):\n\n" + stopwatch.Elapsed.TotalSeconds; //10.57 seconds
}
List<String> sortLIST(List<String> minusLIST, List<String> plusLIST)
{
plusLIST = plusLIST.OrderBy(i => double.Parse(i.Split(',')[0])).ToList();plusLIST.Reverse();
minusLIST = minusLIST.OrderBy(i => double.Parse(i.Split(',')[0].TrimStart('-'))).ToList();
plusLIST.AddRange(minusLIST);
return plusLIST;
}
I ran some benchmarks, and managed to speed it up by a factor of 2...
public List<String> ParallelSort()
{
var result = new List<string>(plusLIST.Count + minusLIST.Count);
var t1 = Task.Run(() => plusLIST.AsParallel().OrderByDescending(i => int.Parse(i.Split(',')[0])));
var t2 = Task.Run(() => minusLIST.AsParallel().OrderBy(i => int.Parse(i.Split(',')[0].TrimStart('-'))));
Task.WaitAll(t1, t2);
result.AddRange(t1.Result);
result.AddRange(t2.Result);
return result;
}
Here's the benchmark, using BenchmarkDotNet.
(I reduced the list size by a factor of 10, because benchmarking takes a long time and I want to go home tonight).
| Method | Mean | Error | StdDev | Median | Gen 0/1k Op | Gen 1/1k Op | Gen 2/1k Op | Allocated Memory/Op |
|----------------------- |----------:|---------:|----------:|----------:|------------:|------------:|------------:|--------------------:|
| OPSort | 142.35 ms | 5.150 ms | 14.693 ms | 137.40 ms | 29000.0000 | 1000.0000 | - | 135.99 MB |
| OPSortByDescendingLazy | 118.19 ms | 2.672 ms | 7.752 ms | 117.01 ms | 29000.0000 | 1000.0000 | - | 127.32 MB |
| SlightlyParallelSort | 122.62 ms | 3.063 ms | 8.538 ms | 120.45 ms | 29000.0000 | 1000.0000 | - | 127.32 MB |
| ParallelSort | 71.37 ms | 2.190 ms | 6.389 ms | 70.30 ms | 28000.0000 | 1000.0000 | - | 148.63 MB |
| RegexSort | 250.80 ms | 4.887 ms | 7.315 ms | 251.70 ms | 32000.0000 | 1000.0000 | - | 145.99 MB |
And the code:
[MemoryDiagnoser]
public class Tests
{
private List<String> minusLIST = new List<string>(100000);
private List<String> plusLIST = new List<string>(200000);
[IterationSetup]
public void IterationSetup()
{
plusLIST.Clear();
minusLIST.Clear();
//Add 3 million elements
for (double i = 0; i < 100000; i += 1)
{
plusLIST.Add(i + ",awb/aje" + " - " + "ddfas/asa" + " - " + "asoo/qwa");
}
for (double i = 1; i < 200000; i += 1)
{
minusLIST.Add("-" + i + ",awb/aje" + " - " + "ddfas/asa" + " - " + "asoo/qwa");
}
}
[Benchmark]
public List<String> OPSort()
{
plusLIST = plusLIST.OrderBy(i => double.Parse(i.Split(',')[0])).ToList(); plusLIST.Reverse();
minusLIST = minusLIST.OrderBy(i => double.Parse(i.Split(',')[0].TrimStart('-'))).ToList();
plusLIST.AddRange(minusLIST);
return plusLIST;
}
[Benchmark]
public List<String> OPSortByDescendingLazy()
{
var result = new List<string>(plusLIST.Count + minusLIST.Count);
result.AddRange(plusLIST.OrderByDescending(i => int.Parse(i.Split(',')[0])));
result.AddRange(minusLIST.OrderBy(i => int.Parse(i.Split(',')[0].TrimStart('-'))));
return result;
}
[Benchmark]
public List<String> SlightlyParallelSort()
{
var result = new List<string>(plusLIST.Count + minusLIST.Count);
var t1 = Task.Run(() => plusLIST.OrderByDescending(i => int.Parse(i.Split(',')[0])));
var t2 = Task.Run(() => minusLIST.OrderBy(i => int.Parse(i.Split(',')[0].TrimStart('-'))));
Task.WaitAll(t1, t2);
result.AddRange(t1.Result);
result.AddRange(t2.Result);
return result;
}
[Benchmark]
public List<String> ParallelSort()
{
var result = new List<string>(plusLIST.Count + minusLIST.Count);
var t1 = Task.Run(() => plusLIST.AsParallel().OrderByDescending(i => int.Parse(i.Split(',')[0])));
var t2 = Task.Run(() => minusLIST.AsParallel().OrderBy(i => int.Parse(i.Split(',')[0].TrimStart('-'))));
Task.WaitAll(t1, t2);
result.AddRange(t1.Result);
result.AddRange(t2.Result);
return result;
}
private static readonly Regex splitRegex = new Regex(@"^(\d+)");
private static readonly Regex splitWithMinusRegex = new Regex(@"^-(\d+)");
// To test the suggestion from @PanagiotisKanavos
[Benchmark]
public List<String> RegexSort()
{
plusLIST = plusLIST.OrderBy(i => double.Parse(splitRegex.Match(i).Groups[1].Value)).ToList(); plusLIST.Reverse();
minusLIST = minusLIST.OrderBy(i => double.Parse(splitWithMinusRegex.Match(i).Groups[1].Value)).ToList();
plusLIST.AddRange(minusLIST);
return plusLIST;
}
}
class Program
{
static void Main(string[] args)
{
var summary = BenchmarkRunner.Run<Tests>();
Console.WriteLine("========= DONE! ============");
Console.ReadLine();
}
}
It's probably possible to speed it up more: the next step is to break out a profiler.
Note that your arrays are large enough to be allocated on the Large Object Heap, which is generally bad news. You want to avoid as many of these sorts of allocations as you can, so don't let your lists figure out what size they need to be by themselves.