Search code examples
c++stdvector

std::vector throws std::out_of_range while reading tokens from a file


I have a problem where I open a file (tokens.tk), I extract the tokens and put them into a Tokens struct. The problem I'm getting is when I'm trying to access the vector, it throws a std::out_of_range error. I don't know what is going on. Here is my code:

#include <fstream>
#include <iostream>
#include <sstream>
#include <vector>

using namespace std;

typedef uint8_t u8;

struct Token {
  static const u8 UNSPEC{0};
  static const u8 NO_CHILD{1};
  static const u8 ONE_CHILD{2};
  static const u8 TWO_CHILD{3};
  static const u8 THREE_CHILD{4};
  static const u8 FOUR_CHILD{5};
  u8 level;
  string message;
  Token(u8 level, string message) {
    this->level = level;
    this->message = message;
  }
  static Token parse(string line) {
    string temp;
    istringstream iss(line);
    char lev;
    u8 tpe;
    iss >> temp >> lev;
    switch (lev) {
    case 'c':
      tpe = Token::UNSPEC;
    case '0':
      tpe = Token::NO_CHILD;
    case '1':
      tpe = Token::ONE_CHILD;
    case '2':
      tpe = Token::TWO_CHILD;
    case '3':
      tpe = Token::THREE_CHILD;
    case '4':
      tpe = Token::FOUR_CHILD;
    default:
      break;
    }
    return Token(tpe, temp);
  }
};

struct SortedTokens {
  vector<Token> first, second, third, fourth, none, unspec;
};

struct IOError {
  string message;
  IOError(string message, int line) {
    this->message = message;
    message.append(to_string(line));
  }
};

class TokenScraper {
  ifstream file;
  vector<Token> tokens;

public:
  TokenScraper(string filename);
  void parseFile();
  struct SortedTokens sort();
  ~TokenScraper();
};

TokenScraper::TokenScraper(string filename) {
  file.open(filename);
  if (!file.is_open() || file.peek() == file.eof()) {
    throw new IOError("Cannot open file at line:", __LINE__);
    file.close();
    return;
  }
}

void TokenScraper::parseFile() {
  std::string temp;
  while (getline(file, temp)) {
    Token t = Token::parse(temp);
    tokens.push_back(t);
  }
}

TokenScraper::~TokenScraper(){
  file.close();
}

struct SortedTokens TokenScraper::sort() {
  SortedTokens st;
  for (int i = 0; i < tokens.size(); i++) {
    switch (tokens.at(i).level) {
    case Token::NO_CHILD:
      st.none.push_back(tokens.at(i));
    case Token::ONE_CHILD:
      st.first.push_back(tokens.at(i));
    case Token::TWO_CHILD:
      st.second.push_back(tokens.at(i));
    case Token::THREE_CHILD:
      st.third.push_back(tokens.at(i));
    case Token::FOUR_CHILD:
      st.fourth.push_back(tokens.at(i));
    case Token::UNSPEC:
      st.unspec.push_back(tokens.at(i));
    default:
      break;
    }
  }
  return st;
}

TokenScraper ts("tokens.tk");

int main(void){
  ts.parseFile();
  struct SortedTokens st = ts.sort();

  std::cout << st.first.at(0).message << '\n';
  std::cout << st.second.at(0).message << '\n';
  
}

And here is my tokens.tk:

@use 1;
@start 1;
@eval 2;
@end 0;
@include 1;
@def 2;
@alias 2;

pack C;
import 1;
export C;

the tokens have two parts:

  1. the token (std::string)
  2. the children(unsigned char)

I looked at other answers on StackOverflow and none of them really apply to me.


Solution

  • The root of your problem is that all of your case blocks are missing a break statement. So, no matter which value is detected by the switch, the code will flow into the corresponding case first and then fall-through into all subsequent cases.

    As such, inside of Token::parse(), most1 of your parsed tokens will end up being classified as Token::FOUR_CHILD, which means inside of TokenScraper::sort(), the st.first and st.second vectors are never populated, and TokenScraper::sort() will push the parsed tokens into the wrong vectors.

    In main(), you are trying to access the 1st elements of st.first and st.second, which fails the bounds checks that you are performing with at().

    1: for the pack C; and export C; tokens, their level member will be uninitialized because 'C' and 'c' are not the same value, so your case 'c' block will never run. You should get rid of the case 'c' block altogether and move the assignment of Token::UNSPEC to the default block instead so that all unknown tokens get classified as Token::UNSPEC.


    On a side note:

    • in Token::parse(), you are not checking whether iss >> temp >> lev; is successful before using temp and lev later. This is particularly important when handling the empty line between the @alias 2; and pack C; tokens. You will end up with another Token that has an uninitialized level member.

    • why are you using at() so much? Especially inside your for loop of TokenScraper::sort(), that is overkill since the loop is already doing the necessary bounds checking for you. So you can reduce some overhead by using vector::operator[] instead of vector::at() inside the loop.

    • do not use new with throw. You should be throwing the IOError object by value, not by pointer. Although you can throw a new'd pointer, an exception handler that catches it will not delete it, thus leaking the object.


    Try this instead:

    #include <fstream>
    #include <iostream>
    #include <sstream>
    #include <vector>
    
    using namespace std;
    
    typedef uint8_t u8;
    
    struct IOError {
      string message;
      IOError(string message, int line) {
        this->message = message + to_string(line);
      }
    };
    
    struct Token {
      static const u8 UNSPEC{0};
      static const u8 NO_CHILD{1};
      static const u8 ONE_CHILD{2};
      static const u8 TWO_CHILD{3};
      static const u8 THREE_CHILD{4};
      static const u8 FOUR_CHILD{5};
    
      u8 level;
      string message;
    
      Token(u8 level, string message) {
        this->level = level;
        this->message = message;
      }
    
      static Token parse(const string &line) {
        istringstream iss(line);
        string temp;
        char lev;
        u8 tpe;
    
        if (!(iss >> temp >> lev)) {
            throw IOError("Cannot parse input at line:", __LINE__);
        }
    
        switch (lev) {
        case '0':
          tpe = Token::NO_CHILD;
          break;
        case '1':
          tpe = Token::ONE_CHILD;
          break;
        case '2':
          tpe = Token::TWO_CHILD;
          break;
        case '3':
          tpe = Token::THREE_CHILD;
          break;
        case '4':
          tpe = Token::FOUR_CHILD;
          break;
        default:
          tpe = Token::UNSPEC;
          break;
        }
    
        return Token(tpe, temp);
      }
    };
    
    struct SortedTokens {
      vector<Token> first, second, third, fourth, none, unspec;
    };
    
    class TokenScraper {
      ifstream file;
      vector<Token> tokens;
    
    public:
      TokenScraper(const string &filename);
      void parseFile();
      SortedTokens sort();
      ~TokenScraper();
    };
    
    TokenScraper::TokenScraper(const string &filename) {
      file.open(filename);
      if (!file.is_open() || file.peek() == file.eof()) {
        throw IOError("Cannot open file at line:", __LINE__);
      }
    }
    
    void TokenScraper::parseFile() {
      std::string temp;
      while (getline(file, temp)) {
        if (!temp.empty()) {
          Token t = Token::parse(temp);
          tokens.push_back(t);
        }
      }
    }
    
    TokenScraper::~TokenScraper(){
      file.close();
    }
    
    SortedTokens TokenScraper::sort() {
      SortedTokens st;
      for (size_t i = 0; i < tokens.size(); ++i) {
        switch (tokens[i].level) {
        case Token::NO_CHILD:
          st.none.push_back(tokens[i]);
          break;
        case Token::ONE_CHILD:
          st.first.push_back(tokens[i]);
          break;
        case Token::TWO_CHILD:
          st.second.push_back(tokens[i]);
          break;
        case Token::THREE_CHILD:
          st.third.push_back(tokens[i]);
          break;
        case Token::FOUR_CHILD:
          st.fourth.push_back(tokens[i]);
          break;
        default:
          st.unspec.push_back(tokens[i]);
          break;
        }
      }
      return st;
    }
    
    int main() {
      TokenScraper ts("tokens.tk");
      ts.parseFile();
      SortedTokens st = ts.sort();
    
      std::cout << st.first.at(0).message << '\n';
      std::cout << st.second.at(0).message << '\n';
    }