Search code examples
c++c++11ifstreamremoving-whitespace

How to read the rest of a sequence from new line and without the space? c++


So now looking at line 2, Cow- DNA Sequence; this continues on line 13 and 24 and ... I want to get this long sequence for each sequences ignoring the white space and new line in between.

This is the format of the file: 1

This is the code, which reads the first 10 sequences only

ifstream file ("txt");
string line;
vector <string> vec;
stringstream s;
string name;

string strip(string & s)
{
    size_t b = s.find_first_not_of(' ');
    size_t e = s.find_last_not_of(' ');
    if (b == string::npos) {
        return "";
    } else {
        return s.substr(b, e - b + 1);
    }
}

void getSequence(){
    int i;
    int row;
    int col;
    if (file.is_open()) 
    {
        file >> row >> col;
        for (i = 0; i < row; i++) {
            vec.push_back("");
        }
        i = 0;
        while (getline(file, line)) 
        {
            file >> name;
            if (line == " ") 
            {
                continue;
            }

            vec[i % row] += strip(line);
            i++;
        }
    } 
    else {
        cerr << "Error: file did not open!" << endl;
    }
    for (const string & v : vec) {
        cout << v << endl;
    }
}

Thank you in advance for your help.


Solution

  • Perhaps this will help a bit. The idea is to read the row & col then read the header line for the number of rows. After that repeatedly read the next chunk of lines and append each to the correct item assuming the lines are interleaved.

    #include <iostream>
    #include <fstream>
    #include <string>
    #include <vector>
    
    struct Sequence
    {
        std::string name;
        std::string data;
    };
    using SeqVec = std::vector<Sequence>;
    
    bool readHeader(std::ifstream& f, SeqVec& v)
    {
        for (size_t i = 0; i < v.size(); ++i)
        {
            if (!(f >> v[i].name >> v[i].data))
            {
                return false;
            }
        }
        return true;
    }
    
    int readChunk(std::ifstream& f, SeqVec& v)
    {
        int linesRead = 0;
        std::string chunk;
        for (size_t i = 0; i < v.size(); ++i)
        {
            if(!(f >> chunk))
            {
                break;
            }
            v[i].data += chunk;
            ++linesRead;
        }
        return linesRead;
    }
    
    int main()
    {
        std::vector<Sequence> v;
    
        const std::string filename = "test.txt";
        std::ifstream f(filename);
        if (!f)
        {
            return -1;
        }
    
        int row;
        int col;
        if (f >> row >> col)
        {
            v.resize(row);
            if (!readHeader(f, v))
            {
                return -1;
            }
            for (;;)
            {
                int linesRead = readChunk(f, v);
                if (linesRead == 0 && v[0].data.size() == col)
                {
                    //If we read nothing and the lines are the correct length we're done.
                    break;
                }
                else if (linesRead < v.size())
                {
                    //partial read is an error.
                    return -1;
                }
            }
        }
    
        for (auto& seq : v)
        {
            std::cout << seq.name << " : " << seq.data << "\n";
        }
        return 0;
    }