Search code examples
c++vectorstructwords

Getting Word Frequency From Vector In c++


I have googled this question and couldn't find an answer that worked with my code so i wrote this to get the frequency of the words the only issue is that i am getting the wrong number of occurrences of words apart form one that i think is a fluke. Also i am checking to see if a word has already been entered into the vector so i don't count the same word twice.

fileSize = textFile.size();
vector<wordFrequency> words (fileSize);
int index = 0;
for(int i = 0; i <= fileSize - 1; i++)
{
    for(int j = 0; j < fileSize - 1; j++)
    {
        if(string::npos != textFile[i].find(textFile[j]) && words[i].Word != textFile[j])
        {
            words[j].Word = textFile[i];
            words[j].Times = index++;
        }
    }
    index = 0;
}

Any help would be appreciated.


Solution

  • try this code instead if you do not want to use a map container..

        struct wordFreq{
        string word;
        int count;
        wordFreq(string str, int c):word(str),count(c){}
        };
    vector<wordFreq> words;
    
    int ffind(vector<wordFreq>::iterator i, vector<wordFreq>::iterator j, string s)
    {
        for(;i<j;i++){
            if((*i).word == s)
                return 1;
        }
        return 0;
    }
    

    Code for finding the no of occurrences in a textfile vector is then:

    for(int i=0; i< textfile.size();i++){
        if(ffind(words.begin(),words.end(),textfile[i]))    // Check whether word already checked for, if so move to the next one, i.e. avoid repetitions
            continue;
        words.push_back(wordFreq(textfile[i],1));          // Add the word to vector as it was not checked before and set its count to 1
        for(int j = i+1;j<textfile.size();j++){            // find possible duplicates of textfile[i]
            if(file[j] == (*(words.end()-1)).word)
                (*(words.end()-1)).count++;
        }
    }