I have written a simple Trie implementation. Here is the source code:
#include <string>
#include <map>
typedef unsigned int uint;
class Trie {
public:
class Node {
public:
Node(const char & _value);
~Node();
char get_value() const;
void set_marker(const uint & _marker);
uint get_marker() const;
bool add_child(Node * _child);
Node * get_child(const char & _value) const;
void clear();
private:
char m_value;
uint m_marker;
std::map<char, Node *> m_children;
};
Trie();
~Trie();
bool insert(const std::string & _str);
bool find(const std::string & _str) const;
private:
Node * m_root;
};
// - implementation (in a different file)
using namespace std;
Trie::Node::Node(const char & _value) :
m_value(_value), m_marker(0), m_children() {
}
Trie::Node::~Node() {
clear();
}
void Trie::Node::clear() {
map<char, Node*>::const_iterator it;
for (it = m_children.begin(); it != m_children.end(); ++it) {
delete it->second;
}
}
void Trie::Node::set_marker(const uint & _marker) {
m_marker = _marker;
}
uint Trie::Node::get_marker() const {
return m_marker;
}
char Trie::Node::get_value() const {
return m_value;
}
Trie::Node * Trie::Node::get_child(const char & _value) const {
map<char, Node*>::const_iterator it;
bool found = false;
for (it = m_children.begin(); it != m_children.end(); ++it) {
if (it->first == _value) {
found = true;
break;
}
}
if (found) {
return it->second;
}
return NULL;
}
bool Trie::Node::add_child(Node * _child) {
if (_child == NULL) {
return false;
}
if (get_child(_child->get_value()) != NULL) {
return false;
}
m_children.insert(pair<char, Node *>(_child->get_value(), _child));
return true;
}
Trie::Trie() :
m_root(new Node('\0')) {
}
Trie::~Trie() {
delete m_root;
}
bool Trie::insert(const string & _str) {
Node * current = m_root;
bool inserted = false;
for (uint i = 0; i < _str.size(); ++i) {
Node * child = current->get_child(_str[i]);
if (child == NULL) {
child = new Node(_str[i]);
current->add_child(child);
inserted = true;
}
current = child;
}
if (current->get_marker() != _str.size()) {
current->set_marker(_str.size());
inserted = true;
}
return inserted;
}
bool Trie::find(const std::string & _str) const {
Node * current = m_root;
bool found = false;
for (uint i = 0; i < _str.size(); ++i) {
Node * child = current->get_child(_str[i]);
if (child == NULL) {
break;
} else {
current = child;
}
}
if (current->get_marker() == _str.size()) {
found = true;
}
return found;
}
Here is my test program:
#include <iostream>
#include <sstream>
#include "Trie.h"
int main() {
Trie t;
for (unsigned int i = 0; i < 10000; ++i) {
t.insert("hello");
}
return 0;
}
My problem is that even though 'hello' is already inserted the second time its insertion is attempted, and thus new
is not called anymore, a lot of memory is being allocated and de-allocated. This amount increases as I increases the value of max i. For example, in above case valgrind gives this output:
==10322== HEAP SUMMARY:
==10322== in use at exit: 0 bytes in 0 blocks
==10322== total heap usage: 10,011 allocs, 10,011 frees, 300,576 bytes allocated
I have confirmed that the number of times Node() constructor is called is constant. Then why and how is all that memory being allocated and deallocated?
Every single time you call insert
, you pass it a const char[6]
, but it expects a const std::string&
, and so each and every iteration creates a temporary std::string
, which is then passed to the function, and then destroyed before the next iteration. That clarifies 10000 of the allocations and deallocations, leaving only 11, which are presumably your allocation of the node, as well as whatever std::map
does internally, and a few other places I overlooked (such as copies of strings or the map)
A container could allocate memory even if it contained no elements, but I'd argue that it should have been designed otherwise, and would be surprised if any major implementation of a container did such a thing. (Though deque may be an exception)