Search code examples
c++data-structuresdirectoryiterationboost-filesystem

How to group data while iterating through a directory in c++


I have a directory with 15 folders and each folder has 100 of text files. In each text files contains a column of numbers.

I need those numbers to do some calculations, but I cannot figure out how to obtain it. I was thinking about a 2D vector, but I need different type of data structure (string for the name of the folder and interger for the numbers).

What is my best solution?d

What I got so far is a code that will search all the files by given a path.

#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <sstream>
#include <algorithm>
#include <tuple>
#include <boost/filesystem.hpp>
#include<dirent.h>

using namespace std;

namespace fs = boost::filesyst

// prototype to search all the files by given it a path
vector<double> getFilesFromDirectory(const fs::path& startDirectory);

int main()
{   // the directory
    string dir =  "/home/...";

    // testing to call my methode
    vector<double> myDataStructure = getFilesFromDirectory(dir);

    // print out the value of myDataStructure
    for (auto it = myDataStructure.begin(); it != myDataStructure.end(); it++)
    {
        cout << *it << " " << endl;
    }

    return 0;
}

// methode to search all the files by given it a path
vector<double> getFilesFromDirectory(const fs::path& startDirectory) 
{
    vector<double> di; 

    // First check if the start path exists
    if (!fs::exists(startDirectory) || !fs::is_directory(startDirectory))
    {
        cout << "Given path not a directory or does not exist" << endl;
        exit(1);
    }

    // Create iterators for iterating all entries in the directory
    fs::recursive_directory_iterator it(startDirectory); // Directory iterator at the start of the directory
    fs::recursive_directory_iterator end; // Directory iterator by default at the end

    // Iterate all entries in the directory and sub directories
    while (it != end)
    {
        // Print leading spaces
        for (int i = 0; i < it.level(); i++)
            cout << "";

        // Check if the directory entry is an directory
        // When directory, print directory name.
        // Else print just the file name.
        if (fs::is_directory(it->status()))
        {
            // print out the path file
            cout << it->path() << endl; 
        }
        else
        { 
            cout << it->path().filename() << endl;

            // test
            di = getValueFromFile(it->path().c_str());

            // test, here I want to group the numbers of the file
            // and each name of the folder
            for(int i = 0; i < 15; i++)
            {
                di.push_back(mi(fs::basename(it->path()), it->path().c_str());
            }
        }

        // When a symbolic link, don't iterate it. Can cause infinite loop.
        if (fs::is_symlink(it->status()))
            it.no_push();

        // Next directory entry
        it++;
    }  
    return di;
}

Solution

  • If I understand the problem correctly, I'd write a class (or struct) to hold the contents of each file:

    A string containing the path: A vector containing every value represented in the column for that file

    In your main program, a vector containing each object you create.

    Definition:

    #ifndef __COLVALS_HPP__
    #define __COLVALS_HPP__
    
    #include <vector>
    #include <string>
    
    class ColVals {
    
    private:
      std::vector<double> _colValues;
      std::string         _pathName;
    
    public:
      ColVals(const std::string& pathName);
      ~ColVals() {}
    
      void appendValue(const double colValue);
    
      std::vector<double> getValues();
    
      std::string getPath();
    };
    
    #endif // __COLVALS_HPP__
    

    Implementation:

    #include "colvals.hpp"
    
    using namespace std;
    
    ColVals::ColVals(const string& pathName) {
      _pathName = pathName;
    }
    
    void ColVals::appendValue(const double colValue) {
      _colValues.push_back(colValue);
    }
    
    vector<double> ColVals::getValues() {
      return _colValues;
    }
    
    string ColVals::getPath() {
      return _pathName;
    }