I'm writing a program for a challenge on HackerRank, and I need to parse HRML, a markup language similar to HTML:
<tag1 value = "Hello World"></tag1>
As a part of the program I have a function which is supposed to fill a vector of strings with the string tokens. It works fine with the tags, but I also need to tokenize queries, which are in the following format:
tag1.tag2.tag3~attribute_name
The function behaves like the string iterator stops advancing after it encounters a tilde. Here is the code:
#include<iostream>
#include<string>
#include<vector>
using namespace std;
void tokenize_string(vector<string>& vector, string str)
{
string current_token;
for (auto i = str.begin(); i != str.end(); i++)
{
if (isalnum(*i))
{
current_token += *i;
}
else
{
//We extracted a token
vector.push_back(current_token);
current_token = "";
}
}
//Remove empty strings that the previous loop placed into the vector
for (auto i = vector.begin(); i != vector.end(); i++)
{
if (*i == "")
{
vector.erase(i);
i = vector.begin();
}
}
}
int main()
{
//A simple test
vector<string> tag_tokens;
vector<string> query_tokens;
tokenize_string(tag_tokens, "<tag1 name=\"Hello\">");
tokenize_string(query_tokens, "tag1.tag2.tag3~name");
for (auto it = tag_tokens.begin(); it != tag_tokens.end(); it++)
{
cout << *it << ' ';
}
cout << '\n';
for (auto it = query_tokens.begin(); it != query_tokens.end(); it++)
{
cout << *it << ' ';
}
cout << '\n';
cin.get();
return 0;
}
It is because you are not considering the last token after reaching the end of the input string
i != str.end()
.
Add vector.push_back(current_token);
after for loop as below to consider last token.
void tokenize_string(vector<string>& vector, string str)
{
string current_token;
for (auto i = str.begin(); i != str.end(); i++)
{
if (isalnum(*i))
{
current_token += *i;
}
else
{
//We extracted a token
vector.push_back(current_token);
current_token = "";
}
}
vector.push_back(current_token); ///-------->pushes last token
//Remove empty strings that the previous loop placed into the vector
for (auto i = vector.begin(); i != vector.end(); i++)
{
if (*i == "")
{
vector.erase(i);
i = vector.begin();
}
}
}