I'm trying to implement the Harvard CS50's problem set five: A spell checker in C.
My program itself does work. I do however, keep encountering memory allocation errors. I have used Valgrind in an attempt to find out where the error is exactly, but with no success. My teaching fellows at my own university have also looked at the code, but could not determine the source of the error as well unfortunately.
The first block of code below implements functionalities for another program which does the actual spell checking. The second is the Valgrind Log. In the third block, the helper file is shown. Finally, the main spell checker is also shown (for completeness).
// Implements a dictionary's functionality
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <strings.h>
// The struct 'node' is defined here
#include "dictionary.h"
// defining SIZE as a constant 26
#define SIZE 26
// Implementing the amount of 'buckets'
node *hashtable[SIZE];
// Implementing the hash function
int hash_function(char* word)
{
unsigned int hash = toupper(word[0]) - 'A';
return hash % SIZE;
}
// Implementing the counter for 'size'
int number = 0;
// Returns true if word is in dictionary else false
bool check(const char *word)
{
// Converts the word that is going to be checked from a const char* to a modifiable string
// Naming word to be checked 'wtbc' and converting it to all lower case letters
int length = strlen(word) + 1;
char wtbc[length];
for (int i = 0; i < (length - 1); i++)
{
wtbc[i] = tolower(word[i]);
}
wtbc[(length - 1)] = '\0';
// Calls on the hashfunction to output the 'bucket' where the wtbc is compared to the dictionary
node* cursor = hashtable[hash_function(wtbc)];
int result = 0;
while (cursor != NULL)
{
result = strcasecmp(cursor->word, wtbc);
if (result == 0)
{
return true;
}
cursor = cursor -> next;
}
return false;
}
// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary)
{
// Making sure each 'head / bucket' points to NULL, using a variable named ptn (pointer to null)
for (int ptn = 0; ptn < SIZE; ptn++)
{
hashtable[ptn] = NULL;
}
// Iterate over each word in the dictionary
// Open the file (FILE I/O FUNCTIONS)
FILE *input = fopen(dictionary, "r");
if (input == NULL)
{
fprintf(stderr, "Could not open dictionary.\n");
return 1;
}
// Converting the const char *dictionary to a usable string
char dictionaryword[(LENGTH + 1)];
// Each new word should become a new node
while (fscanf(input, "%s", dictionaryword) != EOF)
{
// Hash the word: insert into hash table
int bucket = hash_function(dictionaryword);
// Creating a new node
node *new_node = malloc(sizeof(node));
if (new_node == NULL)
{
unload();
return false;
}
// Copy the new word into the new node
strcpy(new_node->word, dictionaryword);
// Creates a new node for the new word if the current head node is pointing to NULL
if (hashtable[bucket] == NULL)
{
// Pointing the head (hashtable[bucket] to point to the first new node)
hashtable[bucket] = new_node;
}
else
{
// Make sure the node points to the first node in the bucket
new_node->next = hashtable[bucket];
hashtable[bucket] = new_node;
}
number++;
}
fclose(input);
return true;
}
// Returns number of words in dictionary if loaded else 0 if not yet loaded
unsigned int size(void)
{
return number;
}
// Unloads dictionary from memory, returning true if successful else false
bool unload(void)
{
for (int j = 0; j < SIZE; j++)
{
node* cursorfree = hashtable[j];
while (cursorfree)
{
// create a temporary node to save the position of the next node
node* temp = cursorfree;
// free the current node
cursorfree = cursorfree->next;
free(temp);
}
}
return true;
}
The following is the Valgrind log:
==3226== Memcheck, a memory error detector
==3226== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.
==3226== Using Valgrind-3.10.1 and LibVEX; rerun with -h for copyright info
==3226== Command: ./speller texts/lalaland.txt
==3226==
MISSPELLED WORDS
==3226== Conditional jump or move depends on uninitialised value(s)
==3226== at 0x4228DC: check (dictionary.c:46)
==3226== by 0x421443: main (speller.c:112)
==3226== Uninitialised value was created by a heap allocation
==3226== at 0x4C2AB80: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==3226== by 0x422C23: load (dictionary.c:89)
==3226== by 0x420A72: main (speller.c:40)
==3226==
Chazelle
L
TECHNO
L
Thelonious
Prius
MIA
L
MIA
...
Sebastian's
Mia
Mia
Mia
MIA
Mia
Mia
Mia
Sebastian's
L
==3226== Conditional jump or move depends on uninitialised value(s)
==3226== at 0x4231FC: unload (dictionary.c:132)
==3226== by 0x421644: main (speller.c:152)
==3226== Uninitialised value was created by a heap allocation
==3226== at 0x4C2AB80: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==3226== by 0x422C23: load (dictionary.c:89)
==3226== by 0x420A72: main (speller.c:40)
==3226==
WORDS MISSPELLED: 955
WORDS IN DICTIONARY: 143091
WORDS IN TEXT: 17756
TIME IN load: 2.07
TIME IN check: 140.60
TIME IN size: 0.00
TIME IN unload: 0.22
TIME IN TOTAL: 142.88
==3226==
==3226== HEAP SUMMARY:
==3226== in use at exit: 0 bytes in 0 blocks
==3226== total heap usage: 143,093 allocs, 143,093 frees, 8,014,232 bytes allocated
==3226==
==3226== All heap blocks were freed -- no leaks are possible
==3226==
==3226== For counts of detected and suppressed errors, rerun with: -v
==3226== ERROR SUMMARY: 981 errors from 2 contexts (suppressed: 0 from 0)
My node struct is defined in the following file:
// Declares a dictionary's functionality
#ifndef DICTIONARY_H
#define DICTIONARY_H
#include <stdbool.h>
// Maximum length for a word
// (e.g., pneumonoultramicroscopicsilicovolcanoconiosis)
#define LENGTH 46
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
node* head;
// Prototypes
bool check(const char *word);
bool load(const char *dictionary);
unsigned int size(void);
bool unload(void);
#endif // DICTIONARY_H
For completeness, here is the main spell checker program:
// Implements a spell-checker
#include <ctype.h>
#include <stdio.h>
#include <sys/resource.h>
#include <sys/time.h>
#include "dictionary.h"
// Undefine any definitions
#undef calculate
#undef getrusage
// Default dictionary
#define DICTIONARY "dictionaries/large"
// Prototype
double calculate(const struct rusage *b, const struct rusage *a);
int main(int argc, char *argv[])
{
// Check for correct number of args
if (argc != 2 && argc != 3)
{
printf("Usage: speller [dictionary] text\n");
return 1;
}
// Structures for timing data
struct rusage before, after;
// Benchmarks
double time_load = 0.0, time_check = 0.0, time_size = 0.0, time_unload = 0.0;
// Determine dictionary to use
char *dictionary = (argc == 3) ? argv[1] : DICTIONARY;
// Load dictionary
getrusage(RUSAGE_SELF, &before);
bool loaded = load(dictionary);
getrusage(RUSAGE_SELF, &after);
// Exit if dictionary not loaded
if (!loaded)
{
printf("Could not load %s.\n", dictionary);
return 1;
}
// Calculate time to load dictionary
time_load = calculate(&before, &after);
// Try to open text
char *text = (argc == 3) ? argv[2] : argv[1];
FILE *file = fopen(text, "r");
if (file == NULL)
{
printf("Could not open %s.\n", text);
unload();
return 1;
}
// Prepare to report misspellings
printf("\nMISSPELLED WORDS\n\n");
// Prepare to spell-check
int index = 0, misspellings = 0, words = 0;
char word[LENGTH + 1];
// Spell-check each word in text
for (int c = fgetc(file); c != EOF; c = fgetc(file))
{
// Allow only alphabetical characters and apostrophes
if (isalpha(c) || (c == '\'' && index > 0))
{
// Append character to word
word[index] = c;
index++;
// Ignore alphabetical strings too long to be words
if (index > LENGTH)
{
// Consume remainder of alphabetical string
while ((c = fgetc(file)) != EOF && isalpha(c));
// Prepare for new word
index = 0;
}
}
// Ignore words with numbers (like MS Word can)
else if (isdigit(c))
{
// Consume remainder of alphanumeric string
while ((c = fgetc(file)) != EOF && isalnum(c));
// Prepare for new word
index = 0;
}
// We must have found a whole word
else if (index > 0)
{
// Terminate current word
word[index] = '\0';
// Update counter
words++;
// Check word's spelling
getrusage(RUSAGE_SELF, &before);
bool misspelled = !check(word);
getrusage(RUSAGE_SELF, &after);
// Update benchmark
time_check += calculate(&before, &after);
// Print word if misspelled
if (misspelled)
{
printf("%s\n", word);
misspellings++;
}
// Prepare for next word
index = 0;
}
}
// Check whether there was an error
if (ferror(file))
{
fclose(file);
printf("Error reading %s.\n", text);
unload();
return 1;
}
// Close text
fclose(file);
// Determine dictionary's size
getrusage(RUSAGE_SELF, &before);
unsigned int n = size();
getrusage(RUSAGE_SELF, &after);
// Calculate time to determine dictionary's size
time_size = calculate(&before, &after);
// Unload dictionary
getrusage(RUSAGE_SELF, &before);
bool unloaded = unload();
getrusage(RUSAGE_SELF, &after);
// Abort if dictionary not unloaded
if (!unloaded)
{
printf("Could not unload %s.\n", dictionary);
return 1;
}
// Calculate time to unload dictionary
time_unload = calculate(&before, &after);
// Report benchmarks
printf("\nWORDS MISSPELLED: %d\n", misspellings);
printf("WORDS IN DICTIONARY: %d\n", n);
printf("WORDS IN TEXT: %d\n", words);
printf("TIME IN load: %.2f\n", time_load);
printf("TIME IN check: %.2f\n", time_check);
printf("TIME IN size: %.2f\n", time_size);
printf("TIME IN unload: %.2f\n", time_unload);
printf("TIME IN TOTAL: %.2f\n\n",
time_load + time_check + time_size + time_unload);
// Success
return 0;
}
// Returns number of seconds between b and a
double calculate(const struct rusage *b, const struct rusage *a)
{
if (b == NULL || a == NULL)
{
return 0.0;
}
else
{
return ((((a->ru_utime.tv_sec * 1000000 + a->ru_utime.tv_usec) -
(b->ru_utime.tv_sec * 1000000 + b->ru_utime.tv_usec)) +
((a->ru_stime.tv_sec * 1000000 + a->ru_stime.tv_usec) -
(b->ru_stime.tv_sec * 1000000 + b->ru_stime.tv_usec)))
/ 1000000.0);
}
}
Sorry for the long post. I was not sure how much code was relevant to solving this problem, and I thought it would be best to upload as much as possible. I've been stuck on this problem for a while now. If anyone has any idea what is causing this error I would be very grateful!
You are leaving the next
field uninitialised when there's no hash collision. This leads to undefined behaviour later on when next
is examined.
malloc
isn't doing any zero initialisation for you. You must assign NULL
to next
.
In fact this entire fragment
// Creates a new node for the new word if the current head node is pointing to NULL
if (hashtable[bucket] == NULL)
{
// Pointing the head (hashtable[bucket] to point to the first new node)
hashtable[bucket] = new_node;
new_node->next = NULL; // <--- this is missing
}
else
{
// Make sure the node points to the first node in the bucket
new_node->next = hashtable[bucket];
hashtable[bucket] = new_node;
}
can be reduced to
new_node->next = hashtable[bucket];
hashtable[bucket] = new_node;
which always does the right thing, whether or not hashtable[bucket]
contains NULL
.