The issue I'm having with this code stems from the last block of code for the get_words_beginning_s function.
/*
Name: xx
Date: xx
Purpose:Read text from file, count number of words, unique words, word frequency, & number of words that begin with letter 's'
*/
#include <iostream>
#include <fstream>
#include <string>
#include <set>
using namespace std;
multiset<string> display_and_load_words(string filename);
set<string> get_unique_words(multiset<string>& words);
set<string> get_words_beginning_s(multiset<string>& words);
int main() {
cout << "The Word Counter program\n\n";
string filename = "dickens.txt";
cout << "FILE TEXT: ";
//display_text(filename);
auto words = display_and_load_words(filename);
cout << "WORD COUNT: " << words.size() << endl << endl;
auto unique_words = get_unique_words(words);
auto words_beginning_s = get_words_beginning_s(words);
cout << unique_words.size() << " UNIQUE WORDS: ";
for (string word : unique_words) {
cout << word << ' ';
}
cout << endl << endl;
cout << "COUNT PER WORD: ";
for (string word : unique_words) {
cout << word << '=' << words.count(word) << ' ';
}
cout << endl << endl;
cout << "WORDS THAT BEGIN WITH 'S': ";
for (string word : words_beginning_s) {
cout << word << ' ';
}
cout << endl << endl;
}
multiset<string> display_and_load_words(string filename) {
multiset<string> words;
ifstream infile(filename);
if (infile) {
string word;
while (infile >> word) {
cout << word << ' ';
string new_word = "";
for (char c : word) {
if (c == '.' || c == ',') {
continue; // remove punctuation
}
else if (isupper(c)) {
new_word += tolower(c); // convert to lowercase
}
else {
new_word += c;
}
}
words.insert(new_word); // add word
}
cout << endl << endl;
infile.close();
}
return words;
}
set<string> get_unique_words(multiset<string>& words) {
set<string> unique_words;
for (string word : words) {
auto search = unique_words.find(word);
if (search == unique_words.end()) {
unique_words.insert(word);
}
}
return unique_words;
}
set<string> get_words_beginning_s(multiset<string>& words) {
set<string> words_beginning_s;
for (string word : words) {
auto search = words_beginning_s.find(word);
if (search == words_beginning_s.end()) {
for (int i = 0; i < words_beginning_s.size(); ++i) {
if (words_beginning_s[0] == 's') {
words_beginning_s.insert(word);
}
}
}
}
return words_beginning_s;
}
If working with set/multiset, how does one compare positional values within each separate word itself, rather the entire words? Example string in text file - "John goes to the store": Whereas normally a simple for loop can be used with the initial position to compare values and count number of times it appears (something like)-
for (int i = 0; i < words_beginning_s.length(); ++i) {
if (words_beginning_s[0] == 's') {
++s_word;
}
This does not work when using set/multiset. Pretty new to this, so sorry if this question seems dumb.
You can use the multiset
s member function lower_bound
to get iterators to a range and then create a set
from that range.
Example:
#include <iostream>
#include <set>
#include <string>
std::set<std::string> get_words_beginning_s(const std::multiset<std::string>& words) {
// create a set from the iterators you get from lower_bound("s") and lower_bound("t"):
return {words.lower_bound("s"), words.lower_bound("t")};
}
int main() {
std::multiset<std::string> words{
"foo", "slayer", "bar", "sepultura", "tesseract", "skinny puppy", "yello"
};
for(const std::string& word : get_words_beginning_s(words)) {
std::cout << word << '\n';
}
}
Output:
sepultura
skinny puppy
slayer