I'm doing the Pset5 of CS50, "speller", where you have to create a spelling checking program that loads a dictionary into memory, and hashes the words into a hash table. Then it reads a text file, compares it to the dictionary word for word, and give a result of all the misspellings. It then unloads all the memory used, before finishing.
I have implemented 5 functions, (load, hash, check, size and unload) to do all the steps. Now the weird problem is when I'm checking the program with valgrind
for leaks it's working as it should (slow but working) and no memory leaks. When I try to run the program without valgrind, it just keeps loading and doesn't give out a result.
Running debug50 give the same frozen outcome. I have tried watching a video about how to solve this Pset and it's similar to what I did, but somehow theirs work. (I don't want to copy paste blindly because that's not learning). I have read some posts here about the same Pset5 speller program but none have faced my unique situation. I haven't been able to pinpoint what's the problem. I'm hoping someone would point out an obvious code flaw that's causing it to freeze.
Here is my code:
dictionary.c:
// Implements a dictionary's functionality
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <string.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// TODO: Choose number of buckets in hash table
const unsigned int N = 26;
unsigned int total_number = 0;
// Hash table
node *table[N];
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
// TODO
int index = hash(word);
node *temp = table[index];
while (temp != NULL)
{
if (strcasecmp(temp->word, word) == 0)
{
return true;
}
else
{
temp = temp->next;
}
}
free(temp);
return false;
}
// Hashes word to a number
unsigned int hash(const char *word)
{
// TODO: Improve this hash function
unsigned int index = 0;
if (strlen(word) > 1)
{
unsigned int index1 = toupper(word[0]) - 'A';
unsigned int index2 = toupper(word[1]) - 'A';
index = index1 + index2;
}
else
{
index = toupper(word[0]) - 'A';
}
index = index % N;
return index;
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
// TODO
// open dictionary
FILE *file = fopen(dictionary, "r");
if (file == NULL)
{
printf("couldn't load dictionary\n");
return false;
}
// read word by word
char word[LENGTH + 1];
while (fscanf(file, "%s", word) != EOF)
{
int index = hash(word);
node *newnode = malloc(sizeof(node));
if (newnode == NULL)
{
printf("couldn't read file\n");
return false;
}
strcpy(newnode->word, word);
newnode->next = table[index];
table[index] = newnode;
total_number ++;
free(newnode);
}
fclose(file);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
// TODO
return total_number;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
// TODO
// iterate through the hash buckets
for (int i = 0; i < N; i++)
{
// only free if bucket is used
if (table[i] != NULL)
{
// setting up a moving cursor and
node *tmp = table[i];
node *cursor = table[i];
while(cursor != NULL)
{
cursor = cursor->next;
free(tmp);
tmp = cursor;
}
free(cursor);
}
}
return true;
}
dictionary.h:
// Declares a dictionary's functionality
#ifndef DICTIONARY_H
#define DICTIONARY_H
#include <stdbool.h>
// Maximum length for a word
// (e.g., pneumonoultramicroscopicsilicovolcanoconiosis)
#define LENGTH 45
// Prototypes
bool check(const char *word);
unsigned int hash(const char *word);
bool load(const char *dictionary);
unsigned int size(void);
bool unload(void);
#endif // DICTIONARY_H
This part is provided as part of the problem, and calls my code:
speller.c:
// Implements a spell-checker
#include <ctype.h>
#include <stdio.h>
#include <sys/resource.h>
#include <sys/time.h>
#include "dictionary.h"
// Undefine any definitions
#undef calculate
#undef getrusage
// Default dictionary
#define DICTIONARY "dictionaries/large"
// Prototype
double calculate(const struct rusage *b, const struct rusage *a);
int main(int argc, char *argv[])
{
// Check for correct number of args
if (argc != 2 && argc != 3)
{
printf("Usage: ./speller [DICTIONARY] text\n");
return 1;
}
// Structures for timing data
struct rusage before, after;
// Benchmarks
double time_load = 0.0, time_check = 0.0, time_size = 0.0, time_unload = 0.0;
// Determine dictionary to use
char *dictionary = (argc == 3) ? argv[1] : DICTIONARY;
// Load dictionary
getrusage(RUSAGE_SELF, &before);
bool loaded = load(dictionary);
getrusage(RUSAGE_SELF, &after);
// Exit if dictionary not loaded
if (!loaded)
{
printf("Could not load %s.\n", dictionary);
return 1;
}
// Calculate time to load dictionary
time_load = calculate(&before, &after);
// Try to open text
char *text = (argc == 3) ? argv[2] : argv[1];
FILE *file = fopen(text, "r");
if (file == NULL)
{
printf("Could not open %s.\n", text);
unload();
return 1;
}
// Prepare to report misspellings
printf("\nMISSPELLED WORDS\n\n");
// Prepare to spell-check
int index = 0, misspellings = 0, words = 0;
char word[LENGTH + 1];
// Spell-check each word in text
char c;
while (fread(&c, sizeof(char), 1, file))
{
// Allow only alphabetical characters and apostrophes
if (isalpha(c) || (c == '\'' && index > 0))
{
// Append character to word
word[index] = c;
index++;
// Ignore alphabetical strings too long to be words
if (index > LENGTH)
{
// Consume remainder of alphabetical string
while (fread(&c, sizeof(char), 1, file) && isalpha(c));
// Prepare for new word
index = 0;
}
}
// Ignore words with numbers (like MS Word can)
else if (isdigit(c))
{
// Consume remainder of alphanumeric string
while (fread(&c, sizeof(char), 1, file) && isalnum(c));
// Prepare for new word
index = 0;
}
// We must have found a whole word
else if (index > 0)
{
// Terminate current word
word[index] = '\0';
// Update counter
words++;
// Check word's spelling
getrusage(RUSAGE_SELF, &before);
bool misspelled = !check(word);
getrusage(RUSAGE_SELF, &after);
// Update benchmark
time_check += calculate(&before, &after);
// Print word if misspelled
if (misspelled)
{
printf("%s\n", word);
misspellings++;
}
// Prepare for next word
index = 0;
}
}
// Check whether there was an error
if (ferror(file))
{
fclose(file);
printf("Error reading %s.\n", text);
unload();
return 1;
}
// Close text
fclose(file);
// Determine dictionary's size
getrusage(RUSAGE_SELF, &before);
unsigned int n = size();
getrusage(RUSAGE_SELF, &after);
// Calculate time to determine dictionary's size
time_size = calculate(&before, &after);
// Unload dictionary
getrusage(RUSAGE_SELF, &before);
bool unloaded = unload();
getrusage(RUSAGE_SELF, &after);
// Abort if dictionary not unloaded
if (!unloaded)
{
printf("Could not unload %s.\n", dictionary);
return 1;
}
// Calculate time to unload dictionary
time_unload = calculate(&before, &after);
// Report benchmarks
printf("\nWORDS MISSPELLED: %d\n", misspellings);
printf("WORDS IN DICTIONARY: %d\n", n);
printf("WORDS IN TEXT: %d\n", words);
printf("TIME IN load: %.2f\n", time_load);
printf("TIME IN check: %.2f\n", time_check);
printf("TIME IN size: %.2f\n", time_size);
printf("TIME IN unload: %.2f\n", time_unload);
printf("TIME IN TOTAL: %.2f\n\n",
time_load + time_check + time_size + time_unload);
// Success
return 0;
}
// Returns number of seconds between b and a
double calculate(const struct rusage *b, const struct rusage *a)
{
if (b == NULL || a == NULL)
{
return 0.0;
}
else
{
return ((((a->ru_utime.tv_sec * 1000000 + a->ru_utime.tv_usec) -
(b->ru_utime.tv_sec * 1000000 + b->ru_utime.tv_usec)) +
((a->ru_stime.tv_sec * 1000000 + a->ru_stime.tv_usec) -
(b->ru_stime.tv_sec * 1000000 + b->ru_stime.tv_usec)))
/ 1000000.0);
}
}
You will also need a "make file" that glues all of them together to run the program as below:
speller:
clang -ggdb3 -gdwarf-4 -O0 -Qunused-arguments -std=c11 -Wall -Werror -Wextra -Wno-gnu-folding-constant -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wshadow -c -o speller.o speller.c
clang -ggdb3 -gdwarf-4 -O0 -Qunused-arguments -std=c11 -Wall -Werror -Wextra -Wno-gnu-folding-constant -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wshadow -c -o dictionary.o dictionary.c
clang -ggdb3 -gdwarf-4 -O0 -Qunused-arguments -std=c11 -Wall -Werror -Wextra -Wno-gnu-folding-constant -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wshadow -o speller speller.o dictionary.o -lm
The problem is in your load()
function. You have this:
while (fscanf(file, "%s", word) != EOF)
{
int index = hash(word);
node *newnode = malloc(sizeof(node));
if (newnode == NULL)
{
printf("couldn't read file\n");
return false;
}
strcpy(newnode->word, word);
newnode->next = table[index];
table[index] = newnode;
total_number ++;
free(newnode); // BUG here
}
For each word read from the dictionary, you allocate memory for a new node
using malloc()
, and copy the word into it. You then link this new node
into the linked list in the correct bucket.
You then call free(newnode)
. This un-allocates the memory for your new word, and makes it available to be allocated again. Now the pointer to the word in your linked list no longer points to allocated memory, it points to freed memory. (This is called a dangling pointer.) This will cause undefined behaviour if you use that pointer (which you do).
The solution is to not call free(newnode)
in load()
.
As for why it "works" when you use valgrind
- well, I don't think it does. When I run your original code with valgrind, it reports numerous errors. With my suggested fix, it reports none. That your original code (with valgrind) gives you the expected output, well, that is one possible outcome of undefined behaviour. It doesn't always mean your code will malfunction. Often it will appear to work (until you make some other innocent change, and then it all blows up.)
Aside: You have the following in check()
:
while (temp != NULL)
{
// Some code
}
free(temp);
After your while()
loop finishes, temp
is equal to NULL
. You should not be calling free(NULL)
. Most systems will accept it and do nothing, but it is very weird and confusing. (You do the same thing with free(cursor);
in unload()
.)