Search code examples
cvalgrindcs50freeze

CS50 Speller Pset working with valgrind but not separately


I'm doing the Pset5 of CS50, "speller", where you have to create a spelling checking program that loads a dictionary into memory, and hashes the words into a hash table. Then it reads a text file, compares it to the dictionary word for word, and give a result of all the misspellings. It then unloads all the memory used, before finishing.

I have implemented 5 functions, (load, hash, check, size and unload) to do all the steps. Now the weird problem is when I'm checking the program with valgrind for leaks it's working as it should (slow but working) and no memory leaks. When I try to run the program without valgrind, it just keeps loading and doesn't give out a result.

Running debug50 give the same frozen outcome. I have tried watching a video about how to solve this Pset and it's similar to what I did, but somehow theirs work. (I don't want to copy paste blindly because that's not learning). I have read some posts here about the same Pset5 speller program but none have faced my unique situation. I haven't been able to pinpoint what's the problem. I'm hoping someone would point out an obvious code flaw that's causing it to freeze.

Here is my code:

dictionary.c:

// Implements a dictionary's functionality

#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <string.h>
#include "dictionary.h"

// Represents a node in a hash table
typedef struct node
{
    char word[LENGTH + 1];
    struct node *next;
}
node;

// TODO: Choose number of buckets in hash table
const unsigned int N = 26;
unsigned int total_number = 0;
// Hash table
node *table[N];

// Returns true if word is in dictionary, else false
bool check(const char *word)
{
    // TODO
    int index = hash(word);
    node *temp = table[index];
    while (temp != NULL)
    {
        if (strcasecmp(temp->word, word) == 0)
        {
            return true;
        }
        else
        {
            temp = temp->next;
        }
    }
    free(temp);
    return false;
}

// Hashes word to a number
unsigned int hash(const char *word)
{
    // TODO: Improve this hash function
    unsigned int index = 0;
    if (strlen(word) > 1)
    {
        unsigned int index1 = toupper(word[0]) - 'A';
        unsigned int index2 = toupper(word[1]) - 'A';
        index = index1 + index2;
    }
    else
    {
        index = toupper(word[0]) - 'A';
    }
    index = index % N;
    return index;
}

// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
    // TODO
    // open dictionary
    FILE *file = fopen(dictionary, "r");
    if (file == NULL)
    {
        printf("couldn't load dictionary\n");
        return false;
    }
    // read word by word
    char word[LENGTH + 1];
    while (fscanf(file, "%s", word) != EOF)
    {
        int index = hash(word);
        node *newnode = malloc(sizeof(node));
        if (newnode == NULL)
        {
            printf("couldn't read file\n");
            return false;
        }
        strcpy(newnode->word, word);
        newnode->next = table[index];
        table[index] = newnode;
        total_number ++;
        free(newnode);
    }
    fclose(file);
    return true;
}

// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
    // TODO
    return total_number;
}

// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
    // TODO
    // iterate through the hash buckets
    for (int i = 0; i < N; i++)
    {
        // only free if bucket is used
        if (table[i] != NULL)
        {
            // setting up a moving cursor and
            node *tmp = table[i];
            node *cursor = table[i];
            while(cursor != NULL)
            {
                cursor = cursor->next;
                free(tmp);
                tmp = cursor;
            }
            free(cursor);
        }
    }
    return true;
}

dictionary.h:

// Declares a dictionary's functionality

#ifndef DICTIONARY_H
#define DICTIONARY_H

#include <stdbool.h>

// Maximum length for a word
// (e.g., pneumonoultramicroscopicsilicovolcanoconiosis)
#define LENGTH 45

// Prototypes
bool check(const char *word);
unsigned int hash(const char *word);
bool load(const char *dictionary);
unsigned int size(void);
bool unload(void);

#endif // DICTIONARY_H

This part is provided as part of the problem, and calls my code:

speller.c:

// Implements a spell-checker

#include <ctype.h>
#include <stdio.h>
#include <sys/resource.h>
#include <sys/time.h>

#include "dictionary.h"

// Undefine any definitions
#undef calculate
#undef getrusage

// Default dictionary
#define DICTIONARY "dictionaries/large"

// Prototype
double calculate(const struct rusage *b, const struct rusage *a);

int main(int argc, char *argv[])
{
    // Check for correct number of args
    if (argc != 2 && argc != 3)
    {
        printf("Usage: ./speller [DICTIONARY] text\n");
        return 1;
    }

    // Structures for timing data
    struct rusage before, after;

    // Benchmarks
    double time_load = 0.0, time_check = 0.0, time_size = 0.0, time_unload = 0.0;

    // Determine dictionary to use
    char *dictionary = (argc == 3) ? argv[1] : DICTIONARY;

    // Load dictionary
    getrusage(RUSAGE_SELF, &before);
    bool loaded = load(dictionary);
    getrusage(RUSAGE_SELF, &after);

    // Exit if dictionary not loaded
    if (!loaded)
    {
        printf("Could not load %s.\n", dictionary);
        return 1;
    }

    // Calculate time to load dictionary
    time_load = calculate(&before, &after);

    // Try to open text
    char *text = (argc == 3) ? argv[2] : argv[1];
    FILE *file = fopen(text, "r");
    if (file == NULL)
    {
        printf("Could not open %s.\n", text);
        unload();
        return 1;
    }

    // Prepare to report misspellings
    printf("\nMISSPELLED WORDS\n\n");

    // Prepare to spell-check
    int index = 0, misspellings = 0, words = 0;
    char word[LENGTH + 1];

    // Spell-check each word in text
    char c;
    while (fread(&c, sizeof(char), 1, file))
    {
        // Allow only alphabetical characters and apostrophes
        if (isalpha(c) || (c == '\'' && index > 0))
        {
            // Append character to word
            word[index] = c;
            index++;

            // Ignore alphabetical strings too long to be words
            if (index > LENGTH)
            {
                // Consume remainder of alphabetical string
                while (fread(&c, sizeof(char), 1, file) && isalpha(c));

                // Prepare for new word
                index = 0;
            }
        }

        // Ignore words with numbers (like MS Word can)
        else if (isdigit(c))
        {
            // Consume remainder of alphanumeric string
            while (fread(&c, sizeof(char), 1, file) && isalnum(c));

            // Prepare for new word
            index = 0;
        }

        // We must have found a whole word
        else if (index > 0)
        {
            // Terminate current word
            word[index] = '\0';

            // Update counter
            words++;

            // Check word's spelling
            getrusage(RUSAGE_SELF, &before);
            bool misspelled = !check(word);
            getrusage(RUSAGE_SELF, &after);

            // Update benchmark
            time_check += calculate(&before, &after);

            // Print word if misspelled
            if (misspelled)
            {
                printf("%s\n", word);
                misspellings++;
            }

            // Prepare for next word
            index = 0;
        }
    }

    // Check whether there was an error
    if (ferror(file))
    {
        fclose(file);
        printf("Error reading %s.\n", text);
        unload();
        return 1;
    }

    // Close text
    fclose(file);

    // Determine dictionary's size
    getrusage(RUSAGE_SELF, &before);
    unsigned int n = size();
    getrusage(RUSAGE_SELF, &after);

    // Calculate time to determine dictionary's size
    time_size = calculate(&before, &after);

    // Unload dictionary
    getrusage(RUSAGE_SELF, &before);
    bool unloaded = unload();
    getrusage(RUSAGE_SELF, &after);

    // Abort if dictionary not unloaded
    if (!unloaded)
    {
        printf("Could not unload %s.\n", dictionary);
        return 1;
    }

    // Calculate time to unload dictionary
    time_unload = calculate(&before, &after);

    // Report benchmarks
    printf("\nWORDS MISSPELLED:     %d\n", misspellings);
    printf("WORDS IN DICTIONARY:  %d\n", n);
    printf("WORDS IN TEXT:        %d\n", words);
    printf("TIME IN load:         %.2f\n", time_load);
    printf("TIME IN check:        %.2f\n", time_check);
    printf("TIME IN size:         %.2f\n", time_size);
    printf("TIME IN unload:       %.2f\n", time_unload);
    printf("TIME IN TOTAL:        %.2f\n\n",
           time_load + time_check + time_size + time_unload);

    // Success
    return 0;
}

// Returns number of seconds between b and a
double calculate(const struct rusage *b, const struct rusage *a)
{
    if (b == NULL || a == NULL)
    {
        return 0.0;
    }
    else
    {
        return ((((a->ru_utime.tv_sec * 1000000 + a->ru_utime.tv_usec) -
                  (b->ru_utime.tv_sec * 1000000 + b->ru_utime.tv_usec)) +
                 ((a->ru_stime.tv_sec * 1000000 + a->ru_stime.tv_usec) -
                  (b->ru_stime.tv_sec * 1000000 + b->ru_stime.tv_usec)))
                / 1000000.0);
    }
}

You will also need a "make file" that glues all of them together to run the program as below:

speller:
    clang -ggdb3 -gdwarf-4 -O0 -Qunused-arguments -std=c11 -Wall -Werror -Wextra -Wno-gnu-folding-constant -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wshadow -c -o speller.o speller.c
    clang -ggdb3 -gdwarf-4 -O0 -Qunused-arguments -std=c11 -Wall -Werror -Wextra -Wno-gnu-folding-constant -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wshadow -c -o dictionary.o dictionary.c
    clang -ggdb3 -gdwarf-4 -O0 -Qunused-arguments -std=c11 -Wall -Werror -Wextra -Wno-gnu-folding-constant -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wshadow -o speller speller.o dictionary.o -lm

Solution

  • The problem is in your load() function. You have this:

    while (fscanf(file, "%s", word) != EOF)
        {
            int index = hash(word);
            node *newnode = malloc(sizeof(node));
            if (newnode == NULL)
            {
                printf("couldn't read file\n");
                return false;
            }
            strcpy(newnode->word, word);
            newnode->next = table[index];
            table[index] = newnode;
            total_number ++;
            free(newnode); // BUG here
        }
    

    For each word read from the dictionary, you allocate memory for a new node using malloc(), and copy the word into it. You then link this new node into the linked list in the correct bucket.

    You then call free(newnode). This un-allocates the memory for your new word, and makes it available to be allocated again. Now the pointer to the word in your linked list no longer points to allocated memory, it points to freed memory. (This is called a dangling pointer.) This will cause undefined behaviour if you use that pointer (which you do).

    The solution is to not call free(newnode) in load().

    As for why it "works" when you use valgrind - well, I don't think it does. When I run your original code with valgrind, it reports numerous errors. With my suggested fix, it reports none. That your original code (with valgrind) gives you the expected output, well, that is one possible outcome of undefined behaviour. It doesn't always mean your code will malfunction. Often it will appear to work (until you make some other innocent change, and then it all blows up.)

    Aside: You have the following in check():

        while (temp != NULL)
        {
            // Some code
        }
        free(temp);
    

    After your while() loop finishes, temp is equal to NULL. You should not be calling free(NULL). Most systems will accept it and do nothing, but it is very weird and confusing. (You do the same thing with free(cursor); in unload().)