Search code examples
clinked-listinfinite-loopcs50singly-linked-list

infinite loop in cs50 speller assignment


I was working on the cs50 speller assignment, and once I finished debugging the load function, the program just ran in an infinite loop without doing or printing anything.

// Implements a dictionary's functionality

#include <stdio.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>

#include "dictionary.h"

// Represents a node in a hash table
typedef struct node
{
    char word[LENGTH + 1];
    struct node *next;
}
node;

// TODO: Choose number of buckets in hash table
const unsigned int N = 26 * LENGTH;

// Hash table
node *table[N];

unsigned int word_count;

// Returns true if word is in dictionary, else false
bool check(const char *word)
{
    node *tmp = table[hash(word)];
    while (tmp != NULL)
    {
        if (strcasecmp(tmp->word, word) == 0)
        {
            return true;
        }
        tmp = tmp->next;
    }
    return false;
}

// Hashes word to a number
int hash(const char *word)
{
    // TODO: Improve this hash function
    int total = 0;
    for (int i = 0; i < strlen(word); i++)
    {
        if (isalpha(word[i]))
        {
            total += (int)(tolower(word[i]) - 'a');
        }
    }
    return total;
}

// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
    FILE *dict = fopen(dictionary, "r");
    if (dict == NULL)
    {
        return false;
    }
    char *word = malloc(LENGTH + 1);
    if (word == NULL)
    {
        return false;
    }
    while (fscanf(dict, "%s", word) == 1)
    {
        node *new = malloc(sizeof *new);
        if (new == NULL)
        {
            return false;
        }
        strcpy(new->word, word);
        if (table[hash(word)] == NULL)
        {
            table[hash(word)] = new;
        }
        else
        {
            new->next = table[hash(word)];
            table[hash(word)]->next = new;
        }
        word_count++;
    }
    free(word);
    free(dict);
    return true;
}

// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
    if (word_count)
    {
        return word_count;
    }
    else
    {
        return 0;
    }
}

// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
    node *tmp;
    node *cursor;
    for (int i = 0; i < LENGTH * 25; i++)
    {
        tmp = table[i]->next;
        cursor = table[i]->next;
        while (tmp != NULL)
        {
            tmp = tmp->next;
            free(cursor);
            cursor = tmp;
        }
    }
    return true;
}

I have had many errors with this problem, and once I fixed up load there was an infinite loop error in another function. I'm not sure how to pinpoint it- can anyone help?

The infinite loop prints out "MISPELLED WORDS" and then a few lines down it has the white cursor that appears in the console. I know that the program is still running because no $ sign appears. The program is not waiting for any sort of input because this project does not include any reason to use a cs50 get_xxx function or scanf.

These are only the helper functions (the assignment is only to implement them) and the rest is written for me, in a separate file:

// Implements a spell-checker

#include <ctype.h>
#include <stdio.h>
#include <sys/resource.h>
#include <sys/time.h>

#include "dictionary.h"

// Undefine any definitions
#undef calculate
#undef getrusage

// Default dictionary
#define DICTIONARY "dictionaries/large"

// Prototype
double calculate(const struct rusage *b, const struct rusage *a);

int main(int argc, char *argv[])
{
    // Check for correct number of args
    if (argc != 2 && argc != 3)
    {
        printf("Usage: ./speller [DICTIONARY] text\n");
        return 1;
    }
    // Structures for timing data
    struct rusage before, after;

    // Benchmarks
    double time_load = 0.0, time_check = 0.0, time_size = 0.0, time_unload = 0.0;

    // Determine dictionary to use
    char *dictionary = (argc == 3) ? argv[1] : DICTIONARY;
    // Load dictionary
    getrusage(RUSAGE_SELF, &before);
    bool loaded = load(dictionary);
    getrusage(RUSAGE_SELF, &after);

    // Exit if dictionary not loaded
    if (!loaded)
    {
        printf("Could not load %s.\n", dictionary);
        return 1;
    }

    // Calculate time to load dictionary
    time_load = calculate(&before, &after);

    // Try to open text
    char *text = (argc == 3) ? argv[2] : argv[1];
    FILE *file = fopen(text, "r");
    if (file == NULL)
    {
        printf("Could not open %s.\n", text);
        unload();
        return 1;
    }

    // Prepare to report misspellings
    printf("\nMISSPELLED WORDS\n\n");

    // Prepare to spell-check
    int index = 0, misspellings = 0, words = 0;
    char word[LENGTH + 1];

    // Spell-check each word in text
    char c;
    while (fread(&c, sizeof(char), 1, file))
    {
        // Allow only alphabetical characters and apostrophes
        if (isalpha(c) || (c == '\'' && index > 0))
        {
            // Append character to word
            word[index] = c;
            index++;

            // Ignore alphabetical strings too long to be words
            if (index > LENGTH)
            {
                // Consume remainder of alphabetical string
                while (fread(&c, sizeof(char), 1, file) && isalpha(c));

                // Prepare for new word
                index = 0;
            }
        }

        // Ignore words with numbers (like MS Word can)
        else if (isdigit(c))
        {
            // Consume remainder of alphanumeric string
            while (fread(&c, sizeof(char), 1, file) && isalnum(c));

            // Prepare for new word
            index = 0;
        }

        // We must have found a whole word
        else if (index > 0)
        {
            // Terminate current word
            word[index] = '\0';

            // Update counter
            words++;

            // Check word's spelling
            getrusage(RUSAGE_SELF, &before);
            bool misspelled = !check(word);
            getrusage(RUSAGE_SELF, &after);

            // Update benchmark
            time_check += calculate(&before, &after);

            // Print word if misspelled
            if (misspelled)
            {
                printf("%s\n", word);
                misspellings++;
            }

            // Prepare for next word
            index = 0;
        }
    }

    // Check whether there was an error
    if (ferror(file))
    {
        fclose(file);
        printf("Error reading %s.\n", text);
        unload();
        return 1;
    }

    // Close text
    fclose(file);

    // Determine dictionary's size
    getrusage(RUSAGE_SELF, &before);
    unsigned int n = size();
    getrusage(RUSAGE_SELF, &after);

    // Calculate time to determine dictionary's size
    time_size = calculate(&before, &after);

    // Unload dictionary
    getrusage(RUSAGE_SELF, &before);
    bool unloaded = unload();
    getrusage(RUSAGE_SELF, &after);

    // Abort if dictionary not unloaded
    if (!unloaded)
    {
        printf("Could not unload %s.\n", dictionary);
        return 1;
    }

    // Calculate time to unload dictionary
    time_unload = calculate(&before, &after);

    // Report benchmarks
    printf("\nWORDS MISSPELLED:     %d\n", misspellings);
    printf("WORDS IN DICTIONARY:  %d\n", n);
    printf("WORDS IN TEXT:        %d\n", words);
    printf("TIME IN load:         %.2f\n", time_load);
    printf("TIME IN check:        %.2f\n", time_check);
    printf("TIME IN size:         %.2f\n", time_size);
    printf("TIME IN unload:       %.2f\n", time_unload);
    printf("TIME IN TOTAL:        %.2f\n\n",
           time_load + time_check + time_size + time_unload);

    // Success
    return 0;
}

// Returns number of seconds between b and a
double calculate(const struct rusage *b, const struct rusage *a)
{
    if (b == NULL || a == NULL)
    {
        return 0.0;
    }
    else
    {
        return ((((a->ru_utime.tv_sec * 1000000 + a->ru_utime.tv_usec) -
                  (b->ru_utime.tv_sec * 1000000 + b->ru_utime.tv_usec)) +
                 ((a->ru_stime.tv_sec * 1000000 + a->ru_stime.tv_usec) -
                  (b->ru_stime.tv_sec * 1000000 + b->ru_stime.tv_usec)))
                / 1000000.0);
    }
}

I didn't write this other file, and I don't completely understand it (I'm not supposed to) but it's where the functions are actually called.

edit: thank you so much to Chqrlie for helping me out! Here is my updated code:

#include <stdio.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>

#include "dictionary.h"



// Represents a node in a hash table
typedef struct node
{
    char word[LENGTH + 1];
    struct node *next;
}
node;

// Hash table
node *table[N];

unsigned int word_count;

// Returns true if word is in dictionary, else false
bool check(const char *word)
{
    node *tmp = table[hash(word)];
    while (tmp != NULL)
    {
        if (strcasecmp(tmp -> word, word) == 0)
        {
            return true;
        }
        tmp = tmp -> next;
    }
    return false;
}

// Hashes word to a number
int hash(const char *word)
{
    // TODO: Improve this hash function
    int total = 0;
    unsigned char c;
    for (int i = 0, n = strlen(word); i < n; i++)
    {
        c = word[i];
        if (isalpha(c))
        {
            total += (int) (tolower(c) - 'a');
        }
    }
    if (total >= 0 && total <= N - 1)
    {
        return total;
    }
    else
    {
        return N - 1;
    }
}

// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
    FILE *dict = fopen(dictionary, "r");
    if (dict == NULL)
    {
        return false;
    }
    char *word = malloc(LENGTH + 1);
    if (word == NULL)
    {
        return false;
    }
    while (fscanf(dict, "%" STR(LENGTH) "s", word) == 1)
    {
        node *new = malloc(sizeof *new);
        if (new == NULL)
        {
            return false;
        }
        strcpy(new->word, word);
        if (table[hash(word)] == NULL)
        {
            table[hash(word)] = new;
            new->next = NULL;
        }
        else
        {
            new -> next = table[hash(word)];
            table[hash(word)] = new;
        }
        word_count++;
    }
    free(word);
    free(dict);
    return true;
}

// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
    return word_count;
}

// Unloads dictionary from memory, returning true if successful, else false
bool unload(void) {
    for (int i = 0; i < N; i++) {
        while (table[i]) {
            node *tmp = table[i];
            table[i] = tmp->next;
            free(tmp);
        }
    }
    return true;
}

I am only running into one error, and I genuinely have no idea how it's even possible. My program prints out everything it should (words misspelled, words in dictionary, etc) but at the end, It runs an infinite loop again - no $ sign appears and the white cursor just stays there forever. This is especially confusing because after all of those printf statements are done, the program should immediately return 0. Does anyone have any idea where this could be happening?

(note - the speller file is the same as it was before, and I think it must be where the error is happening.)


Solution

  • There are multiple problems in your code:

    • with N defined as const unsigned int N = 26 * LENGTH; you cannot define a global array of length N: node *table[N]; does not conform to the C Standard. You should either write:

      • #define N (26 * LENGTH) or
      • enum { N = 26 * LENGTH };`
    • function hash should return an unsigned int value, and you should make sure the return value is in the range 0 to N-1.

    • the functions/macros isalpha and tolower are not defined for all char values on platforms where char is a signed type. You should either cast the char argument as (unsigned char) or use an unsigned char variable to avoid the undefined behavior:

      // Hashes word to a number
      unsigned int hash(const char *word)
      {
          // TODO: Improve this hash function
          unsigned int total = 0;
          for (int i = 0; word[i]; i++)
          {
              unsigned char ch = word[i];
              if (isalpha(ch))
              {
                  total += tolower(ch) - 'a';
              }
          }
          return total % N;
      }
      
    • fscanf(dict, "%s", word) will cause a buffer overflow if the input file has a word longer than LENGTH. You should pass the maximum number of characters to store into the destination array before the null terminator this way:

      #define STR(x)  xSTR(x)
      #define xSTR(x) #x
      
      while (fscanf(dict, "%" STR(LENGTH) "s", word) == 1)
      
    • table[hash(word)]->next = new; is causing an infinite loop because new->next was set to table[hash(word)] just before. Hence if there is a collision in the hash table, you will have a 2 element circular list at table[hash(word)]. You should instead write table[hash(word)] = new. Inserting new in the hash table can be done without the test and the hash code should be computed only once:

          strcpy(new->word, word);
          unsigned int h = hash(word);
          new->next = table[h];
          table[h] = new;
      
    • the size function should be simplified as:

      unsigned int size(void) {
          return word_count;
      }
      
    • the unload function is bogus too: you should iterate to i < N and you should free all elements, starting at table[i] instead of table[i]->next:

      // Unloads dictionary from memory, returning true if successful, else false
      bool unload(void) {
          for (int i = 0; i < N; i++) {
              while (table[i]) {
                  node *tmp = table[i];
                  table[i] = tmp->next;
                  free(tmp);
              }
          }
          return true;
      }
      

    I didn't write this other file, and I don't completely understand it (I'm not supposed to) but it's where the functions are actually called.

    Good for you! The style of the main function is not perfect and the loop char c; while (fread(&c, sizeof(char), 1, file)) instead of the classic int c; while ((c = getc(file)) != EOF) is not recommended, especially when c is to be passed to the character class functions defined in <ctype.h>. Note also that the last word of the text file will not be checked if the file does not have a newline or another word separator after it.