Search code examples
c++algorithmdata-structurestrie

why this C++ Trie implementation is showing odd behaviour?


I implemented this class to create a trie data structure. The function

unsigned long Insert(string) //inserts the string in trie & return no of words in trie

void PrintAllWords(); // prints all words in trie separated by space in dictionary order

implementation works correctly and prints all the words inserted from a text file of english dictionary words when the number of words is not very large, but when supplied with a file with some 350k words it only prints out a b c d upto z.

private variables

struct TrieTree
{
    std::map<char,struct TrieTree*> map_child;
    std::map<char,unsigned long> map_count; //keeps incrementing count of char in map during insertion.
    bool _isLeaf=false;  // this flag is set true at node where word ends
};

struct TrieTree* _root=NULL;
unsigned long _wordCount=0;
unsigned long _INITIALIZE=1;

Below is complete implementation with driver program. The program is executable.

#include<iostream>
#include<map>
#include<fstream>
class Trie
{
private:

    struct TrieTree
    {
        std::map<char,struct TrieTree*> map_child;
        std::map<char,unsigned long> map_count;
        bool _isLeaf=false;
    };

    struct TrieTree* _root=NULL;
    unsigned long _wordCount=0;
    unsigned long _INITIALIZE=1;

    struct TrieTree* getNode()
    {
        return new TrieTree;
    };


    void printWords(struct TrieTree* Tptr,std::string pre)
    {
        if(Tptr->_isLeaf==true)
        {
            std::cout<<pre<<" ";
            return;
        }

        std::map<char,struct TrieTree*>::iterator it;
        it=Tptr->map_child.begin();
        while(it!=Tptr->map_child.end())
        {
            pre.push_back(it->first);
            printWords(it->second,pre);
            pre.erase(pre.length()-1);  //erase last prefix character
            it++;
        }

    }


public:

    Trie()
    {
        _root=getNode();
    }
    unsigned long WordCount()
    {
        return _wordCount;
    }
    unsigned long WordCount(std::string pre) //count words with prefix pre
    {
        if(WordCount()!=0)
        {
            struct TrieTree *Tptr=_root;
            std::map<char,unsigned long>::iterator it;
            char lastChar;
            for(int i=0;i<pre.length()-1;i++)
            {
                Tptr=Tptr->map_child[pre[i]];
            }
            lastChar=pre[pre.length()-1];
            it=Tptr->map_count.find(lastChar);
            if(it!=Tptr->map_count.end())
            {
                return Tptr->map_count[lastChar];
            }
            else
            {
                return 0;
            }
        }
        return 0;
    }

    unsigned long Insert(std::string key)   //return word count after insertion
    {
        struct TrieTree *Tptr =_root;
        std::map<char,struct TrieTree*>::iterator it;

        if(!SearchWord(key))
        {
            for(int level=0;level<key.length();level++)
            {
                it=Tptr->map_child.find(key[level]);
                if(it==Tptr->map_child.end())
                {
                    //alphabet does not exist in map
                    Tptr->map_child[key[level]]=getNode();  // new node with value pointing to it
                    Tptr->map_count[key[level]] = _INITIALIZE;
                    Tptr=Tptr->map_child[key[level]];       //assign pointer to newly obtained node
                    if(level==key.length()-1)
                        Tptr->_isLeaf=true;
                }
                else
                {   //alphabet exists at this level
                    Tptr->map_count[key[level]]++;
                    Tptr=Tptr->map_child[key[level]];
                }
            }
            _wordCount++;
        }
        return _wordCount;
    }

    bool SearchWord(std::string key)
    {
        struct TrieTree *Tptr =_root;
        std::map<char,struct TrieTree*>::iterator it;
        for(int level=0;level<key.length();level++)
        {
            it=Tptr->map_child.find(key[level]);
         //   cout<<" "<<Tptr->map_child.size()<<endl;  //test to count entries at each map level

            if(it!=Tptr->map_child.end())
            {
                Tptr=Tptr->map_child[key[level]];
            }
            else
            {
                return false;
            }
        }
        if(Tptr->_isLeaf==true)
            return true;
        return false;
    }

    void PrintAllWords()
    {   //print all words in trie in dictionary order
        struct TrieTree *Tptr =_root;
        if(Tptr->map_child.empty())
            {
                std::cout<<"Trie is Empty"<<std::endl;
                return;
            }

        printWords(Tptr,"");

    }
    void PrintAllWords(std::string pre)
    {   //print all words in trie with prefix pre in Dictionary order
        struct TrieTree *Tptr =_root;
        if(Tptr->map_child.empty())
            {
                std::cout<<"Trie is Empty"<<std::endl;
                return;
            }

        for(int i=0;i<pre.length();i++)
        {
            Tptr=Tptr->map_child[pre[i]];
        }

        printWords(Tptr,pre);

    }


};

int main(){
Trie t;

std::string str;
std::fstream fs;
fs.open("words.txt",std::ios::in);

while(fs>>str){
    t.Insert(str);
}

t.PrintAllWords();

return 0;
}

I don't understand the output, please take a look at the code and suggest a fix. Thanks


Solution

  • When you add the word "a", if there is no word starting with 'a' in the tree, you will add a "leaf" node with 'a' as the value. If you then add a word starting with 'a', such as "an", you will add the 'n' node as a child of the 'a' node. However, when you print all the words, you stop recursing when you hit a leaf node, meaning you ignore all the other words starting with that word.

    Simple solution: remove the return from printWords.

    Similarly if you already have "an" in the tree, when you add 'a', you don't mark it as a leaf, so it will never be output.

    Simple solution: Set _isLeaf when adding a word, even if the node already exists (i.e. add Tptr->_isLeaf=true; to the else clause in Insert

    I think you would be better off changing _isLeaf to something like _isWord as it seems odd to have leaf nodes with child items.