Search code examples
c++arraysdynamichashhashtable

C++ dynamic hash-table


I am learning hashing in c++ right now. I found one example with class with tests. So i want all test to pass.

A dynamic Hash tablet should be programmed. Hash values should be stored in Array which can change size in purpose. When changing the size of the Array, the Hash function should be changed on a way that the target area of the Hash function to be consistent with the size of the Array. When the size of the array is changed all elements need to be hashed again. The key of the element is String and the value of the Hash need to be calculated (the sum of the ASCII values of all chars of the string) mod (size of the Array). If there is collision an Open Addressing:h(key)+j mod M should be made.

For the removing of the elements I need to take care of the ideal position of each element i=h(k) and the current position j I need to care about: T[i], T[i+1],...,T[j] are already taken.

Until now i am stucked at test5. But every time i try to resize it, i get undefined behavior or it crashes.

This is the hashtable.h class:

#ifndef HASHTABLE_H
#define HASHTABLE_H  
#include <string>

using namespace std;

class hashtable
{
    public:
    hashtable();
    virtual ~hashtable();
    void insert(string);
    int find(string);
    void remove(string);
    void resize_array();
    int hashfunction(string str);
    int getSize();
    string* getArray();

private:
     int elemts_in_array;
     int table_size;
     string* T;
};

This is the hashtable.cpp

#include "hashtable.h"
#include<iostream>
#include<cstring>
using namespace std;

hashtable::hashtable()
{
    table_size=10;
    T=new string[table_size];
// your code (start with a capacity of 10)
}

hashtable::~hashtable()
{
}

int hashtable::hashfunction(string str)
{
    int k;
    for(int i=0;i<table_size;i++)
    k+=(int)str[i];
    return k%table_size;// your code
} 

void hashtable::insert(string key)
{
    int k=hashfunction(key);
//    
//    float filled = (float)sizeof(T) / (float)table_size;
//   
//    cout<<filled<< "percent filled"<<endl;
//
//    if(filled >= 0.8)
//  {
//      cout << "Resizing Array.." << endl;
//        resize_array();
//  }

if(T[k]==""){
    T[k]=key;
    }
else {
    while(T[k]!="")
    if(T[k]==key){
     break;
    }else {
    T[k]=key;
    }
    k++;

}
// your code (keys already present in the hashtable should not be added a second time)
//           (use resize_array when the hashtable is filled by 80%)
//           (the testbench expects the resize taking place before and without considering the 
     insertion of the new element)
}

int hashtable::find(string key)
{
    for(int k=0;k<table_size;k++){
    if(T[k]==key)
    return k ;
}
return -1;
// your code (should return -1 if key was not found)
}

void hashtable::remove(string key)
{
    int k=hashfunction(key);
    T[k]="";
    for(int i=1;i<table_size;i++){
    string next=T[k+i];
    if(hashfunction(next)==k){
    T[k]=T[k+1];
    }
}
// your code (Invariant: ensure all keys are at their "optimal" position afterwards)
}

void hashtable::resize_array()
{
// remember the old table
int old_table_size = table_size;
std::string *old_array = T;

// build the new table and reset counter
table_size *= 2;
T = new std::string[table_size];
elemts_in_array = 0;

// now bring the elements over
for (int i=0; i<old_table_size; ++i)
{
    if (old_array[i].empty())
        insert(old_array[i]);
}

// finally, throw out old array
delete[] old_array;
}

//void hashtable::resize_array()
//{
//    size_t newsize=table_size*2;
//    string *newT=new string[newsize];
//    memcpy(newT,T,table_size*sizeof(string));
//
//    table_size=newsize;
//    delete[] T;
//    T=newT;
//  // your code (double the array size)
//}

int hashtable::getSize()
{
    return table_size;
    // your code
}

string* hashtable::getArray()
{
   return T;
} 

And this is the test file:

#include <iostream>
#include "hashtable.h"

bool compare_arrays(string* testarray, hashtable t, int len)
{
    string* array2 = t.getArray();

    if(t.getSize() != len)
{
    return 1;
}

for (int i=0; i < len ; i++)
{
    if(testarray[i] != array2[i])
    {
        return 1;
    }
}
return 0;
}


void print_array(hashtable t){
     string* array = t.getArray();
     for (int i=0; i< t.getSize();i++)
 {
     cout << i << " - " << array[i]<< endl;
 }
}

void test(hashtable* t)
{
    string test_vector1[10] = {"123","","","","","ABCDE","Info","","",""};
    string test_vector2[10] = {"","","","!","","","Test","","ABC",""};
    string test_vector3[10] = {"123", "T!est","Te!st","Tes!t","Test!","ABCDE","Info","","","!Test"};
    string test_vector4[20] = {"","","","","","","","","","T!est","123","Te!st","Tes!t","Test!","!Test","ABCDE","Info","Inof","",""};
    string test_vector5[20] = {"","","","", "", "", "", "", "", "T!est","123","Tes!t","Test!","!Test","Te!st","ABCDE","Info", "Inof", "", ""};

t->insert("Info");
t->insert("123");
t->insert("ABCDE");

if(compare_arrays(test_vector1, *t,10))
{
    cout << "Hashtable does not work correctly!" << endl;
    return;
}

t->insert("Info");
if(compare_arrays(test_vector1, *t,10))
{
    cout << "Inserting a element twice might not work correctly!" << endl;
    return;
}

int test_var = t->find("CBA");
if(test_var != -1)
{
    cout << "Find might not work correctly for unseen keys." << endl;
    return;

}
test_var = t->find("Info");
if(test_var != 6)
{
    cout << "Find might not work correctly!" << endl;
    return;
}


t->insert("!Test");
t->insert("T!est");
t->insert("Te!st");
t->insert("Tes!t");
t->insert("Test!");
t->insert("Test!");

if(compare_arrays(test_vector3, *t,10))
{
    cout << "Hashfunction / insert might not work correctly!" << endl;
    return;
}

t->insert("Inof");
if(compare_arrays(test_vector4, *t,20))
{
    cout << "Resize Array might not work correctly!" << endl;
    return;
}

t->remove("!");
t->remove("Te!st");
t-> insert("Te!st");
if(compare_arrays(test_vector5, *t,20))
t-> insert("Te!st");
if(compare_arrays(test_vector5, *t,20))
{
    cout << "Remove might not work correctly!" << endl;
    return;
}

cout << "All test passed!";

}

int main()
{
    hashtable t;
    test(&t);
}

Solution

  • Two glaring errors:

    int hashtable::hashfunction(string str)
    {
        int k;  // <-- Uninitialized
        for (int i = 0; i < table_size; i++)
            k += (int)str[i];  // <-- Potential out-of-bounds access
        return k % table_size;// your code
    }
    

    The k is uninitialized, thus you do not know what value you will end up with for k.

    The second issue is that if str has a size less than table_size, you are accessing str out-of-bounds. Your example shows that the string "Info" is being passed to hash_function, but the table_size is 10.


    The other fundamental error is that hashtable does not have correct copy-semantics, thus passing or returning hashtable by value will cause undefined behavior. This is all due to you using

    string *T

    as a member variable.

    The easiest fix for this is to not use string* T as a member and instead use std::vector<std::string> T;