I've written such producer/consumer code, which should generate big file filled with random data
class Program
{
static void Main(string[] args)
{
Random random = new Random();
String filename = @"d:\test_out";
long numlines = 1000000;
var buffer = new BlockingCollection<string[]>(10); //limit to not get OOM.
int arrSize = 100; //size of each string chunk in buffer;
String[] block = new string[arrSize];
Task producer = Task.Factory.StartNew(() =>
{
long blockNum = 0;
long lineStopped = 0;
for (long i = 0; i < numlines; i++)
{
if (blockNum == arrSize)
{
buffer.Add(block);
blockNum = 0;
lineStopped = i;
}
block[blockNum] = random.Next().ToString();
//null is sign to stop if last block is not fully filled
if (blockNum < arrSize - 1)
{
block[blockNum + 1] = null;
}
blockNum++;
};
if (lineStopped < numlines)
{
buffer.Add(block);
}
buffer.CompleteAdding();
}, TaskCreationOptions.LongRunning);
Task consumer = Task.Factory.StartNew(() =>
{
using (var outputFile = new StreamWriter(filename))
{
foreach (string[] chunk in buffer.GetConsumingEnumerable())
{
foreach (string value in chunk)
{
if (value == null) break;
outputFile.WriteLine(value);
}
}
}
}, TaskCreationOptions.LongRunning);
Task.WaitAll(producer, consumer);
}
}
And it does what is intended to do. But for some unknown reason it produces only ~550000 strings, not 1000000 and I can not understand why this is happening.
Can someone point on my mistake? I really don't get what's wrong with this code.
The buffer
String[] block = new string[arrSize];
is declared outside the Lambda. That means it is captured and re-used.
That would normally go unnoticed (you would just write out the wrong random data) but because your if (blockNum < arrSize - 1)
is placed inside the for loop you regularly write a null
into the shared buffer.
Exercise, instead of:
block[blockNum] = random.Next().ToString();
use
block[blockNum] = i.ToString();
and predict and verify the results.