Search code examples
c#multithreadingasync-awaittaskdirectory-walk

Async Await - Need some guidance


I have tried many different ways to get this to work, and I am sure that is not the proper way to wire up async/await for multi threading. Here is what I have so far. It is a directory walker that I attempted to make async. I know that you don't see any async or await keywords and that is because I was unsuccessful, but that is what I am trying to do. Right now it runs in a console application but I will abstract and refactor later once I get a working POC. Any guidance is appreciated.

    static void RunProgram(CancellationToken ct)
    {
        try
        {
            foreach (var dir in _directoriesToProcess)
            {
                var newTask = CreateNewTask(dir, ct);
                _tasks.Add(newTask);
            }

            while (_tasks.Count > 0)
            {
                lock (_collectionLock)
                {
                    var t = _tasks.Where(x => x.IsCompleted == true).ToList();
                    if (t != null)
                        foreach (var task in t)
                        {
                            _tasks.Remove(task);
                        }
                }
            }

            OutputFiles();
            StopAndCleanup();
        }
        catch (Exception ex)
        {
            Log(LogColor.Red, "Error: " + ex.Message, false);
            _cts.Cancel();
        }
    }


    static Task CreateNewTask(string Path, CancellationToken ct)
    {
        return Task.Factory.StartNew(() => GetDirectoryFiles(Path, ct), ct);
    }

    static void GetDirectoryFiles(string Path, CancellationToken ct)
    {
        if (!ct.IsCancellationRequested)
        {
            List<string> subDirs = new List<string>();
            int currentFileCount = 0;
            try
            {
                currentFileCount = Directory.GetFiles(Path, _fileExtension).Count();
                subDirs = Directory.GetDirectories(Path).ToList();

                lock (_objLock)
                {
                    _overallFileCount += currentFileCount;
                    Log(LogColor.White, "- Current path: " + Path);
                    Log(LogColor.Yellow, "--  Sub directory count: " + subDirs.Count);
                    Log(LogColor.Yellow, "--  File extension: " + _fileExtension);
                    Log(LogColor.Yellow, "--  Current count: " + currentFileCount);
                    Log(LogColor.Red, "--  Running total: " + _overallFileCount);
                    _csvBuilder.Add(string.Format("{0},{1},{2},{3}", Path, subDirs.Count, _fileExtension, currentFileCount));
                    Console.Clear();
                    Log(LogColor.White, "Running file count: " + _overallFileCount, false, true);
                }

                foreach (var dir in subDirs)
                {
                    lock (_collectionLock)
                    {
                        var newTask = CreateNewTask(dir, ct);
                        _tasks.Add(newTask);
                    }
                }
            }
            catch (Exception ex)
            {
                Log(LogColor.Red, "Error: " + ex.Message, false);
                _cts.Cancel();
            }
        }
    }

Solution

  • I don't think there's any issue with what you're trying to do, just be cautious about uncontrolled concurrency e.g. reading too many directories at once on different threads. Context switching could end up making it slower.

    Instead of doing things as side effects in your methods, try returning the collected values. e.g.

    static async Task<IEnumerable<DirectoryStat>> GetDirectoryFiles(string path, string fileExtension, CancellationToken ct)
    {
        var thisDirectory = await Task.Run(() => /* Get directory file count and return a DirectoryStat object */);
        var subDirectoriesResults = await Task.WhenAll(Directory.GetDirectories(path).Select(dir => GetDirectoryFiles(dir, fileExtension, ct)));
    
        return (new[] { thisDirectory }).Concat(subDirectoryResults);
    } 
    

    You can then iterate them later and pull the data you need from DirectoryStat (and sum your file counts as per _overallFileCount etc)

    NOTE: Untested :)