I am practicing the implementation of the hash map in C++. My goal is to ultimately map words to a pair of integers that correspond to their line and column in a text file. I took the hash map implementation from here and build upon that. The code works fine when I pass words with only one letter. However, when I have a word with more than one letter, the code compiles on Visual Studio, but at runtime throughs read access violation at this line:
HashNode<K, V> *entry = table[hashValue];
(within the insert member function). I thought there might be some tweaks that I should consider when using strings in a temple structure that I might not be aware of; however, I couldn't really find itout after hours of searching the web. Any ideas on how to fix this is greatly appreciated.
#include <string>
#include <iostream>
#include <tuple>
#include <vector>
using namespace std;
#define TABLE_SIZE 1028
template <typename K, typename V>
class HashNode {
public:
HashNode(const K &key, const V &value) :
key(key), value(value), next(NULL) {
}
K getKey() const {
return key;
}
V getValue() const {
return value;
}
void setValue(V value) {
HashNode::value = value;
}
HashNode *getNext() const {
return next;
}
void setNext(HashNode *next) {
HashNode::next = next;
}
private:
// key-value pair
K key;
V value;
// next bucket with the same key
HashNode *next;
};
template <typename K, typename V, typename F = KeyHash<K>>
class HashMap {
public:
HashMap() {
// construct zero initialized hash table of size
table = new HashNode<K, V> * [TABLE_SIZE]();
}
~HashMap() {
// destroy all buckets one by one
for (int i = 0; i < TABLE_SIZE; ++i) {
HashNode<K, V> *entry = table[i];
while (entry != NULL) {
HashNode<K, V> *prev = entry;
entry = entry->getNext();
delete prev;
}
table[i] = NULL;
}
// destroy the hash table
delete[] table;
}
void get(const K &key, vector<V> &value) {
unsigned long hashValue = hashFunc(key);
HashNode<K, V> *entry = table[hashValue];
while (entry != NULL) {
if (entry->getKey() == key) {
value.push_back(entry->getValue());
//return true;
}
entry = entry->getNext();
}
//return false;
}
void insert(const K &key, const V &value) {
unsigned long hashValue = hashFunc(key);
HashNode<K, V> *prev = NULL;
HashNode<K, V> *entry = table[hashValue];
while (entry != NULL && entry->getKey() == key) {
prev = entry;
entry = entry->getNext();
}
if (entry == NULL) {
entry = new HashNode<K, V>(key, value);
if (prev == NULL) {
// insert as first bucket
table[hashValue] = entry;
}
else {
prev->setNext(entry);
}
}
else {
// just update the value
entry->setValue(value);
}
}
void remove(const K &key) {
unsigned long hashValue = hashFunc(key);
HashNode<K, V> *prev = NULL;
HashNode<K, V> *entry = table[hashValue];
while (entry != NULL && entry->getKey() != key) {
prev = entry;
entry = entry->getNext();
}
if (entry == NULL) {
// key not found
return;
}
else {
if (prev == NULL) {
// remove first bucket of the list
table[hashValue] = entry->getNext();
}
else {
prev->setNext(entry->getNext());
}
delete entry;
}
}
private:
// hash table
HashNode<K, V> **table;
F hashFunc;
};
int main()
{
struct MyKeyHash
{
unsigned long operator()(const string & s) const
{
int hash = 7;
for (int i = 0; i < s.length(); i++)
{
hash = hash * 31 + s[i];
}
return hash;
}
};
HashMap<string, tuple<int, int>, MyKeyHash> hmap;
hmap.insert("BB", make_pair(3, 3));
hmap.insert("A", make_pair(1, 2));
hmap.insert("A", make_pair(4, 2));
vector<tuple<int, int>> value;
hmap.get("B", value);
for (auto it : value)
{
cout << get<0>(it) << ", " << get<1>(it) << endl;
}
}
unsigned long hashValue = hashFunc(key);
//...
table[hashValue]
The hashValue
is returned from the function
unsigned long operator()(const string & s) const
{
int hash = 7;
for (int i = 0; i < s.length(); i++)
{
hash = hash * 31 + s[i];
}
return hash;
}
which can return arbitrarily large values (in the range of int
). But table
is an array of length TABLE_SIZE
(1028). If the output happens to be larger than that, you are accessing it out-of-bounds.
The way the function is written, this is more likely to happen for longer input strings.
You probably meant
unsigned long hashValue = hashFunc(key)%TABLE_SIZE;
Also note that your hash function overflows, causing undefined behavior (because you are using signed integers), if the string is long enough. You should be using unsigned long
instead of int
, matching the return type and being unsigned.