I was working on the cs50 speller assignment, and once I finished debugging the load
function, the program just ran in an infinite loop without doing or printing anything.
// Implements a dictionary's functionality
#include <stdio.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// TODO: Choose number of buckets in hash table
const unsigned int N = 26 * LENGTH;
// Hash table
node *table[N];
unsigned int word_count;
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
node *tmp = table[hash(word)];
while (tmp != NULL)
{
if (strcasecmp(tmp->word, word) == 0)
{
return true;
}
tmp = tmp->next;
}
return false;
}
// Hashes word to a number
int hash(const char *word)
{
// TODO: Improve this hash function
int total = 0;
for (int i = 0; i < strlen(word); i++)
{
if (isalpha(word[i]))
{
total += (int)(tolower(word[i]) - 'a');
}
}
return total;
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
FILE *dict = fopen(dictionary, "r");
if (dict == NULL)
{
return false;
}
char *word = malloc(LENGTH + 1);
if (word == NULL)
{
return false;
}
while (fscanf(dict, "%s", word) == 1)
{
node *new = malloc(sizeof *new);
if (new == NULL)
{
return false;
}
strcpy(new->word, word);
if (table[hash(word)] == NULL)
{
table[hash(word)] = new;
}
else
{
new->next = table[hash(word)];
table[hash(word)]->next = new;
}
word_count++;
}
free(word);
free(dict);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
if (word_count)
{
return word_count;
}
else
{
return 0;
}
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
node *tmp;
node *cursor;
for (int i = 0; i < LENGTH * 25; i++)
{
tmp = table[i]->next;
cursor = table[i]->next;
while (tmp != NULL)
{
tmp = tmp->next;
free(cursor);
cursor = tmp;
}
}
return true;
}
I have had many errors with this problem, and once I fixed up load there was an infinite loop error in another function. I'm not sure how to pinpoint it- can anyone help?
The infinite loop prints out "MISPELLED WORDS" and then a few lines down it has the white cursor that appears in the console. I know that the program is still running because no $
sign appears. The program is not waiting for any sort of input because this project does not include any reason to use a cs50 get_xxx
function or scanf
.
These are only the helper functions (the assignment is only to implement them) and the rest is written for me, in a separate file:
// Implements a spell-checker
#include <ctype.h>
#include <stdio.h>
#include <sys/resource.h>
#include <sys/time.h>
#include "dictionary.h"
// Undefine any definitions
#undef calculate
#undef getrusage
// Default dictionary
#define DICTIONARY "dictionaries/large"
// Prototype
double calculate(const struct rusage *b, const struct rusage *a);
int main(int argc, char *argv[])
{
// Check for correct number of args
if (argc != 2 && argc != 3)
{
printf("Usage: ./speller [DICTIONARY] text\n");
return 1;
}
// Structures for timing data
struct rusage before, after;
// Benchmarks
double time_load = 0.0, time_check = 0.0, time_size = 0.0, time_unload = 0.0;
// Determine dictionary to use
char *dictionary = (argc == 3) ? argv[1] : DICTIONARY;
// Load dictionary
getrusage(RUSAGE_SELF, &before);
bool loaded = load(dictionary);
getrusage(RUSAGE_SELF, &after);
// Exit if dictionary not loaded
if (!loaded)
{
printf("Could not load %s.\n", dictionary);
return 1;
}
// Calculate time to load dictionary
time_load = calculate(&before, &after);
// Try to open text
char *text = (argc == 3) ? argv[2] : argv[1];
FILE *file = fopen(text, "r");
if (file == NULL)
{
printf("Could not open %s.\n", text);
unload();
return 1;
}
// Prepare to report misspellings
printf("\nMISSPELLED WORDS\n\n");
// Prepare to spell-check
int index = 0, misspellings = 0, words = 0;
char word[LENGTH + 1];
// Spell-check each word in text
char c;
while (fread(&c, sizeof(char), 1, file))
{
// Allow only alphabetical characters and apostrophes
if (isalpha(c) || (c == '\'' && index > 0))
{
// Append character to word
word[index] = c;
index++;
// Ignore alphabetical strings too long to be words
if (index > LENGTH)
{
// Consume remainder of alphabetical string
while (fread(&c, sizeof(char), 1, file) && isalpha(c));
// Prepare for new word
index = 0;
}
}
// Ignore words with numbers (like MS Word can)
else if (isdigit(c))
{
// Consume remainder of alphanumeric string
while (fread(&c, sizeof(char), 1, file) && isalnum(c));
// Prepare for new word
index = 0;
}
// We must have found a whole word
else if (index > 0)
{
// Terminate current word
word[index] = '\0';
// Update counter
words++;
// Check word's spelling
getrusage(RUSAGE_SELF, &before);
bool misspelled = !check(word);
getrusage(RUSAGE_SELF, &after);
// Update benchmark
time_check += calculate(&before, &after);
// Print word if misspelled
if (misspelled)
{
printf("%s\n", word);
misspellings++;
}
// Prepare for next word
index = 0;
}
}
// Check whether there was an error
if (ferror(file))
{
fclose(file);
printf("Error reading %s.\n", text);
unload();
return 1;
}
// Close text
fclose(file);
// Determine dictionary's size
getrusage(RUSAGE_SELF, &before);
unsigned int n = size();
getrusage(RUSAGE_SELF, &after);
// Calculate time to determine dictionary's size
time_size = calculate(&before, &after);
// Unload dictionary
getrusage(RUSAGE_SELF, &before);
bool unloaded = unload();
getrusage(RUSAGE_SELF, &after);
// Abort if dictionary not unloaded
if (!unloaded)
{
printf("Could not unload %s.\n", dictionary);
return 1;
}
// Calculate time to unload dictionary
time_unload = calculate(&before, &after);
// Report benchmarks
printf("\nWORDS MISSPELLED: %d\n", misspellings);
printf("WORDS IN DICTIONARY: %d\n", n);
printf("WORDS IN TEXT: %d\n", words);
printf("TIME IN load: %.2f\n", time_load);
printf("TIME IN check: %.2f\n", time_check);
printf("TIME IN size: %.2f\n", time_size);
printf("TIME IN unload: %.2f\n", time_unload);
printf("TIME IN TOTAL: %.2f\n\n",
time_load + time_check + time_size + time_unload);
// Success
return 0;
}
// Returns number of seconds between b and a
double calculate(const struct rusage *b, const struct rusage *a)
{
if (b == NULL || a == NULL)
{
return 0.0;
}
else
{
return ((((a->ru_utime.tv_sec * 1000000 + a->ru_utime.tv_usec) -
(b->ru_utime.tv_sec * 1000000 + b->ru_utime.tv_usec)) +
((a->ru_stime.tv_sec * 1000000 + a->ru_stime.tv_usec) -
(b->ru_stime.tv_sec * 1000000 + b->ru_stime.tv_usec)))
/ 1000000.0);
}
}
I didn't write this other file, and I don't completely understand it (I'm not supposed to) but it's where the functions are actually called.
edit: thank you so much to Chqrlie for helping me out! Here is my updated code:
#include <stdio.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include "dictionary.h"
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Hash table
node *table[N];
unsigned int word_count;
// Returns true if word is in dictionary, else false
bool check(const char *word)
{
node *tmp = table[hash(word)];
while (tmp != NULL)
{
if (strcasecmp(tmp -> word, word) == 0)
{
return true;
}
tmp = tmp -> next;
}
return false;
}
// Hashes word to a number
int hash(const char *word)
{
// TODO: Improve this hash function
int total = 0;
unsigned char c;
for (int i = 0, n = strlen(word); i < n; i++)
{
c = word[i];
if (isalpha(c))
{
total += (int) (tolower(c) - 'a');
}
}
if (total >= 0 && total <= N - 1)
{
return total;
}
else
{
return N - 1;
}
}
// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
FILE *dict = fopen(dictionary, "r");
if (dict == NULL)
{
return false;
}
char *word = malloc(LENGTH + 1);
if (word == NULL)
{
return false;
}
while (fscanf(dict, "%" STR(LENGTH) "s", word) == 1)
{
node *new = malloc(sizeof *new);
if (new == NULL)
{
return false;
}
strcpy(new->word, word);
if (table[hash(word)] == NULL)
{
table[hash(word)] = new;
new->next = NULL;
}
else
{
new -> next = table[hash(word)];
table[hash(word)] = new;
}
word_count++;
}
free(word);
free(dict);
return true;
}
// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
return word_count;
}
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void) {
for (int i = 0; i < N; i++) {
while (table[i]) {
node *tmp = table[i];
table[i] = tmp->next;
free(tmp);
}
}
return true;
}
I am only running into one error, and I genuinely have no idea how it's even possible. My program prints out everything it should (words misspelled, words in dictionary, etc) but at the end, It runs an infinite loop again - no $ sign appears and the white cursor just stays there forever. This is especially confusing because after all of those printf statements are done, the program should immediately return 0. Does anyone have any idea where this could be happening?
(note - the speller file is the same as it was before, and I think it must be where the error is happening.)
There are multiple problems in your code:
with N
defined as const unsigned int N = 26 * LENGTH;
you cannot define a global array of length N
: node *table[N];
does not conform to the C Standard. You should either write:
#define N (26 * LENGTH)
orfunction hash
should return an unsigned int
value, and you should make sure the return value is in the range 0
to N-1
.
the functions/macros isalpha
and tolower
are not defined for all char
values on platforms where char
is a signed type. You should either cast the char
argument as (unsigned char)
or use an unsigned char
variable to avoid the undefined behavior:
// Hashes word to a number
unsigned int hash(const char *word)
{
// TODO: Improve this hash function
unsigned int total = 0;
for (int i = 0; word[i]; i++)
{
unsigned char ch = word[i];
if (isalpha(ch))
{
total += tolower(ch) - 'a';
}
}
return total % N;
}
fscanf(dict, "%s", word)
will cause a buffer overflow if the input file has a word longer than LENGTH
. You should pass the maximum number of characters to store into the destination array before the null terminator this way:
#define STR(x) xSTR(x)
#define xSTR(x) #x
while (fscanf(dict, "%" STR(LENGTH) "s", word) == 1)
table[hash(word)]->next = new;
is causing an infinite loop because new->next
was set to table[hash(word)]
just before. Hence if there is a collision in the hash table, you will have a 2 element circular list at table[hash(word)]
. You should instead write table[hash(word)] = new
. Inserting new
in the hash table can be done without the test and the hash code should be computed only once:
strcpy(new->word, word);
unsigned int h = hash(word);
new->next = table[h];
table[h] = new;
the size
function should be simplified as:
unsigned int size(void) {
return word_count;
}
the unload
function is bogus too: you should iterate to i < N
and you should free all elements, starting at table[i]
instead of table[i]->next
:
// Unloads dictionary from memory, returning true if successful, else false
bool unload(void) {
for (int i = 0; i < N; i++) {
while (table[i]) {
node *tmp = table[i];
table[i] = tmp->next;
free(tmp);
}
}
return true;
}
I didn't write this other file, and I don't completely understand it (I'm not supposed to) but it's where the functions are actually called.
Good for you! The style of the main
function is not perfect and the loop char c; while (fread(&c, sizeof(char), 1, file))
instead of the classic int c; while ((c = getc(file)) != EOF)
is not recommended, especially when c
is to be passed to the character class functions defined in <ctype.h>
. Note also that the last word of the text file will not be checked if the file does not have a newline or another word separator after it.