this is a homework question. I am working on my Hash Table
assignment and I got stuck on the re-hash
function. When I run the code I pasted below this error gets thrown:
terminate called after throwing an instance of 'std::logic_error'
what(): basic_string::_M_construct null not valid
Aborted (core dumped)
And after reading this I'm still very confused on what's happening.
My best effort of producing a minimal reproducible example:
// {
// @note I really tried to shorten it down as much as possible, but I still need certain functions and class declarations in order for the code to run
// }
#include <iostream>
#include <string>
#include <vector>
#include <cmath>
#include <iomanip>
#include <atomic>
class DataEntry {
private:
std::string date;
std::string country;
int c_cases;
int c_deaths;
public:
DataEntry() {
this->date = "\0";
this->country = "\0";
this->c_cases = 0;
this->c_deaths = 0;
}
inline void set_date(std::string set_date) { this->date = set_date;};
inline void set_country(std::string set_country) { this->country = set_country;};
inline void set_c_deaths(int set_c_deaths) { this->c_deaths = set_c_deaths;};
inline void set_c_cases(int set_c_cases) { this->c_cases = set_c_cases;};
inline int get_c_cases() { return c_cases;};
inline int get_c_deaths() { return c_deaths;};
inline std::string get_date() { return date;};
inline std::string get_country() { return country;};
};
class CovidDB {
private:
int size = 17;
std::vector<std::vector<DataEntry*>> HashTable;
public:
inline CovidDB() {
HashTable.resize(size);
}
/** @note Copy constructor */
CovidDB(const CovidDB& other) {
size = other.size;
HashTable.resize(size);
for (int i = 0; i < size; ++i) {
for (const auto& entry : other.HashTable[i]) {
HashTable[i].push_back(new DataEntry(*entry));
}
}
}
inline void clear() {
for (auto& row : HashTable) {
for (auto& entry : row) {
delete entry;
}
row.clear();
}
HashTable.clear();
HashTable.shrink_to_fit();
///@link https://stackoverflow.com/questions/12587774/destructor-not-being-called
}
inline ~CovidDB() { //handles memory
clear();
}
/** @note Move constructor */
CovidDB(CovidDB&& other) noexcept {
size = other.size;
HashTable = std::move(other.HashTable);
other.size = 0;
}
/** @note Overloaded assigment operator*/
CovidDB& operator=(const CovidDB& other) {
if (this == &other) {
return *this; // Self-assignment
}
// clear the data and resources
for (auto& row : HashTable) {
for (auto& entry : row) {
delete entry;
}
row.clear();
}
HashTable.clear();
HashTable.shrink_to_fit();
// copy the data from the other object
size = other.size;
HashTable.resize(size);
for (int i = 0; i < size; ++i) {
for (const auto& entry : other.HashTable[i]) {
HashTable[i].push_back(new DataEntry(*entry));
}
}
return *this;
}
void rehash();
void display_table();
bool add(DataEntry*);
int re_hash_key(std::string);
int hash(std::string);
};
int CovidDB::re_hash_key(std::string country) {
int sum = 0;
int count = 0;
for (char c : country) {
sum = sum + ((count + 1) * c);
count++;
}
return sum % size; //returns the new hash
}
void CovidDB::rehash() {
auto is_prime = [](int num) {
if (num <= 1)
return false;
for (int i = 2; i <= std::sqrt(num); ++i) {
if (num % i == 0)
return false;
}
return true;
};
auto find_next_prime = [&is_prime](int num) {
while (true) {
if (is_prime(num))
return num;
++num;
}
};
int new_size = size * 2;
int new_prime_size = find_next_prime(new_size);
size = new_prime_size; //re-assign size
//CovidDB new_table(std::move(HashTable));//call move constructor, size 37
std::vector<std::vector<DataEntry*>> newHashTable(size); //temp object
newHashTable.resize(size);
// rehash all entries from the original table to the new table
for (std::vector<DataEntry*> row : HashTable) {
for (DataEntry* entry : row) {
int re_hash_i = re_hash_key(entry->get_country());
newHashTable[re_hash_i].push_back(entry);
}
}
// Clear the original table and update it with the new table
clear(); //predefined function that I removed due to MRE
HashTable = newHashTable;
std::cout << "SIZE: " << size << std::endl;
return;
}
void CovidDB::display_table() {
for(std::vector<DataEntry*> vec : HashTable) {
for(DataEntry* entry : vec) {
if (entry != nullptr) { //guard against dereferencing nullptr
std::cout << std::flush;
std::cout << "[Date: " << entry -> get_date() << "], "
<< "[Country: " << entry -> get_country() << "], "
<< "[Cases: " << entry -> get_c_cases() << "], "
<< "[Deaths: " << entry -> get_c_deaths() << "] "
<< std::endl;
}
}
}
std::cout << std::endl;
return;
}
int CovidDB::hash(std::string country) {
int sum = 0;
int count = 0;
for (char c : country) {
sum = sum + ((count + 1) * c);
count++;
}
return sum % size; //returns the hash
}
bool CovidDB::add(DataEntry* entry) {
int index = hash(entry->get_country());
HashTable[index].push_back(entry);
return true;
}
int main() {
CovidDB base;
DataEntry* data1 = new DataEntry();
DataEntry* data2 = new DataEntry();
data1->set_date("01/01/23");
data1->set_country("Slovenia");
data1->set_c_cases(1);
data1->set_c_deaths(1);
data2->set_date("02/02/23");
data2->set_country("Slovenia");
data2->set_c_cases(1);
data2->set_c_deaths(1);
// Add the entries to the CovidDB
base.add(data1);
base.add(data2);
base.display_table();
base.rehash();
base.display_table();
}
The main problem is my re-hash
method. My general idea was to:
Get the new size size * 2
and then next_prime(new_size)
to get the actual size, I used a couple of lambdas to achieve that. That part works since the new size is 37 as it should be.
I emailed my professor about my problem and she said I need to follow the rule of 5 so I made my move constructor and my overloaded operator. I don't believe the move constructor is my answer here since I don't want a copy of my original table, but I want to parse all of the existing entries, re-hash and add them to the table. I could be wrong.
Finally, I decided to create a new object CovidDB
and re-hash all of the. old/existing entries and push them into the new table. This sort of works but my compiler says there is no definition of the =
and the []
operator if I make a new object. If I make a new 2D std::vecotr<std::vector<DataEntry*>>
instead of an object that works Again I don't think that's the right solution.
The strange part to me is the fact that here:
bool CovidDB::add(DataEntry* entry) {
int index = hash(entry->get_country());
HashTable[index].push_back(entry);
return true;
}
I can index my HasHTable
object using the []
operator, but I can't do the same thing in my re_hash
function. I get this error:
error: no match for ‘operator[]’ (operand types are ‘CovidDB’ and ‘int’)
155 | new_table[re_hash_i].push_back(entry);
for this:
CovidDB new_table;
for (std::vector<DataEntry*> row : HashTable) {
for (DataEntry* entry : row) {
int re_hash_i = re_hash_key(entry->get_country());
new_table[re_hash_i].push_back(entry);
}
}
Can someone help me figure out where I potentially went wrong and why would an error be thrown for the []
operator in one case but not the other?
I tried rubber duck debugging the code, especially the operator but came up empty-handed. My debugger throws me into the std::vector
library.
Your problem is here:
// Clear the original table and update it with the new table
clear(); //predefined function that I removed due to MRE
This call to clear()
calls delete
on all the values.
You just spent a lot of time re-hashing the values and putting them into newHashTable
but you did not remove them from the old table HashTable
so when you call clear it goes and deletes the old values.
The solution is not to call clear()
. You overwite the original arrays on the next line:
HashTable = newHashTable;
Which removes all the storage associaetd with HashTable
. No other work is required.
Other Things to watch out for:
Using this->
is suspect to start with. The only need to use this->
is if you have to disambiguate a local from a member variable. This is called shadowing. The problem here is that when you forget this->
and simply use the variable the compiler can not detect this is incorrect so introducing a source of errors. I would prefer to make sure there is no shadowing in the first place. If there is no shadowing then there is no need for this->
this->date = "\0";
this->country = "\0";
There is no need to add the \0
into the string. The string literal ""
already contains the null terminator.
--
No need to mark these functions inline
.
inline void set_date(std::string set_date) { this->date = set_date;};
inline void set_country(std::string set_country) { this->country = set_country;};
inline void set_c_deaths(int set_c_deaths) { this->c_deaths = set_c_deaths;};
inline void set_c_cases(int set_c_cases) { this->c_cases = set_c_cases;};
Non modifying methods should be marked const
to let the compiler know these functions do not modify the object. Not it is quite common to pass objects by const reference
. In this situation only methods marked as const
can be called. So it is general good practive to make sure your object is const correct.
inline int get_c_cases() { return c_cases;};
inline int get_c_deaths() { return c_deaths;};
inline std::string get_date() { return date;};
inline std::string get_country() { return country;};
};
Sure this works:
/** @note Move constructor */
CovidDB(CovidDB&& other) noexcept {
size = other.size;
HashTable = std::move(other.HashTable);
other.size = 0;
}
But more standard is to use the initializer list:
CovidDB(CovidDB&& other) noexcept
: size(other.size)
, HashTable(std::move(other.HashTable))
{
other.size = 0;
}
I did it like that becuase you are not allowed to use STL. But I would take this one step further by using std::exchange
CovidDB(CovidDB&& other) noexcept
: size(std::exchange(other.size, 0))
, HashTable(std::move(other.HashTable))
{}
Your assignment operator is non optimal.
Sure you must be able to handle self assignment. BUT this is an extremely rare situation. You are pessimizing for the normal siutaiont (add an extra check) and optimizing for a situation that happens exceptionally rarely (if you have non normal situations that yes do this but don't make that assumption. Assume normal then optimize for that until you have measured). Optimize for what would be normal situations.
The standard way of doing the assignment operator is the copy and swap idiom. This has been expanded (not sure of the idiom name) to handle copy and move operations by passing by value.
/** @note Overloaded assigment operator*/
CovidDB& operator=(const CovidDB& other) {
if (this == &other) {
return *this; // Self-assignment
}
// clear the data and resources
for (auto& row : HashTable) {
for (auto& entry : row) {
delete entry;
}
row.clear();
}
HashTable.clear();
HashTable.shrink_to_fit();
// copy the data from the other object
size = other.size;
HashTable.resize(size);
for (int i = 0; i < size; ++i) {
for (const auto& entry : other.HashTable[i]) {
HashTable[i].push_back(new DataEntry(*entry));
}
}
return *this;
}
void rehash();
void display_table();
bool add(DataEntry*);
int re_hash_key(std::string);
int hash(std::string);
};
// ReWrite as:
/** @note Overloaded assigment operator*/
CovidDB& operator=(CovidDB other) noexcept. // Notice pass by value.
{
swap(other); // You will need to implement swap.
// left as an excercise.
return *this;
}
Note: When you pass by value. You are either doing a move
or a copy
construction. The move
is cheap. The copy may be expensive but does exactly the work you were doing in the code above. Then you simply swap the parameter and current state and let the destructor of the parameter handle clean up.
We already know a lot of primes. You don't need to go around re-computing them. Have a list of primes. You can start calculating once you have exhausted your list.
int CovidDB::re_hash_key(std::string country) {
int sum = 0;
int count = 0;
for (char c : country) {
sum = sum + ((count + 1) * c);
count++;
}
return sum % size; //returns the new hash
}
Fix this bug:
// Clear the original table and update it with the new table
clear(); //predefined function that I removed due to MRE
HashTable = newHashTable;
Don't manually print the DataEntry
object.
std::cout << "[Date: " << entry -> get_date() << "], "
<< "[Country: " << entry -> get_country() << "], "
<< "[Cases: " << entry -> get_c_cases() << "], "
<< "[Deaths: " << entry -> get_c_deaths() << "] "
<< std::endl;
The DataEntry
type should already know how to print itself.
std::cout << entry << std::endl;
You do this by implementing the output opertator.
std::ostream& operator<<(std::ostream& stream, DataEntry const& data)
{
data.print(stream); // implement the print method.
return stream;
}
That way you can get rid of unnecessary getter functions.
This is not a very good hash function. I suppose it is fine if all you are hashing are country names. But then there are only 160 (approx) contries in the world. You could pre-hash all their values :-) .
// Pass parameters by const reference to prevent a copy.
// Here you are making a copy of `country` each time this function
// is called but that is not required for a hash.
int CovidDB::hash(std::string country) {
int sum = 0;
int count = 0;
for (char c : country) {
sum = sum + ((count + 1) * c);
count++;
}
return sum % size; //returns the hash
}
I got a list of countries and hashed their values. Not a very even distribution.
index: 0 => 13
index: 1 => 12
index: 2 => 13
index: 3 => 9
index: 4 => 8
index: 5 => 13
index: 6 => 10
index: 7 => 19 // Hope I am not in this list.
index: 8 => 11
index: 9 => 10
index: 10 => 11
index: 11 => 8
index: 12 => 12
index: 13 => 11
index: 14 => 18
index: 15 => 7
index: 16 => 10