Search code examples
clinked-listsegmentation-faulthashtablecs50

Segmentation fault in CS50 Speller. Why?


I am working on the CS50 pset5 Speller, and I keep getting a segmentation fault error. Debug50 suggests the problem is the line n->next = table[index]; in the implementation of the load function, line 110. I tried to revise but I can´t figure out why it would give error. Here below my code, can anyone please help me?

// Implements a dictionary's functionality

#include <stdbool.h>
#include <strings.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "dictionary.h"

// Represents a node in a hash table
typedef struct node {
    char word[LENGTH + 1];
    struct node *next;
} node;

// Number of buckets in hash table
const unsigned int N = 150000;

// Nodes counter
int nodes_counter = 0;

// Hash table
node *table[N];

// Returns true if word is in dictionary, else false
bool check(const char *word)
{
    // TODO
    int hash_value = hash(word);
    node *cursor = malloc(sizeof(node));
    if (cursor != NULL)
    {
        cursor = table[hash_value];
    }

    if (strcasecmp(cursor->word, word) == 0) // If word is first item in linked list
    {
        return 0;
    }
    else // Iterate over the list by moving the cursor
    {
        while (cursor->next != NULL)
        {
            if (strcasecmp(cursor->word, word) == 0) // If word is found
            {
                return 0;
            }
            else
            {
                cursor = cursor->next;
            }
        }
    }
    return false;
}

// Hashes word to a number
unsigned int hash(const char *word)
{
    // Adaptation of FNV function, source https://www.programmingalgorithms.com/algorithm/fnv-hash/c/
    const unsigned int fnv_prime = 0x811C9DC5;
    unsigned int hash = 0;
    unsigned int i = 0;

    for (i = 0; i < strlen(word); i++)
    {
        hash *= fnv_prime;
        hash ^= (*word);
    }

    return hash;
}

// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
    // Open Dictionary File (argv[1] or dictionary?)
    FILE *file = fopen(dictionary, "r");
    if (file == NULL)
    {
        printf("Could not open file\n");
        return 1;
    }
    // Read until end of file word by word (store word to read in word = (part of node)?)

    char word[LENGTH + 1];

    while(fscanf(file, "%s", word) != EOF)
    {
        // For each word, create a new node
        node *n = malloc(sizeof(node));
        if (n != NULL)
        {
            strcpy(n->word, word);
            //Omitted to avoid segmentation fault n->next = NULL;
            nodes_counter++;
        }
        else
        {
            return 2;
        }

        // Call hash function (input: word --> output: int)
        int index = hash(word);

        // Insert Node into Hash Table
        n->next = table[index];
        table[index] = n;
    }
    return false;
}

// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
    // Return number of nodes created in Load
    if (nodes_counter > 0)
    {
        return nodes_counter;
    }

    return 0;
}

// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
    // TODO
    for (int i = 0; i < N; i++)
    {
        node *cursor = table[i];
        while (cursor->next != NULL)
        {
            node *tmp = cursor;
            cursor = cursor->next;
            free(tmp);
        }
    }
    return false;
}

Solution

  • There are multiple problems in your code:

    • node *table[N]; cannot be only be defined as a global object if N is a constant expression. N is defined as a const unsigned int, but N is not a constant expression in C (albeit it is in C++). Your program compiles only because the compiler accepts this as a non portable extension. Either use a macro or an enum.
    • you overwrite cursor as soon as it is allocated in check(). There is no need to allocate a node in this function.
    • the hash() function should produce the same hash for words that differ only in case.
    • the hash() function only uses the first letter in word.
    • the hash() function can return a hash value >= N.
    • fscanf(file, "%s", word) should be protected agains buffer overflow.
    • you do not check if cursor is non null before dereferencing it in unload()

    Here is a modified version:

    // Implements a dictionary's functionality
    
    #include <ctype.h>
    #include <stdbool.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <strings.h>
    
    #include "dictionary.h"
    
    // Represents a node in a hash table
    typedef struct node {
        char word[LENGTH + 1];
        struct node *next;
    } node;
    
    // Number of buckets in hash table
    enum { N = 150000 };
    
    // Nodes counter
    int nodes_counter = 0;
    
    // Hash table
    node *table[N];
    
    // Returns true if word is in dictionary, else false
    bool check(const char *word) {
        int hash_value = hash(word);
    
        // Iterate over the list by moving the cursor
        for (node *cursor = table[hash_value]; cursor; cursor = cursor->next) {
            if (strcasecmp(cursor->word, word) == 0) {
                // If word is found
                return true;
            }
        }
        // If word is not found
        return false;
    }
    
    // Hashes word to a number
    unsigned int hash(const char *word) {
        // Adaptation of FNV function, source https://www.programmingalgorithms.com/algorithm/fnv-hash/c/
        unsigned int fnv_prime = 0x811C9DC5;
        unsigned int hash = 0;
    
        for (unsigned int i = 0; word[i] != '\0'; i++) {
            hash *= fnv_prime;
            hash ^= toupper((unsigned char)word[i]);
        }
        return hash % N;
    }
    
    // Loads dictionary into memory, returning true if successful, else a negative error number
    int load(const char *dictionary) {
        // Open Dictionary File (argv[1] or dictionary?)
        FILE *file = fopen(dictionary, "r");
        if (file == NULL) {
            printf("Could not open file\n");
            return -1;
        }
        // Read until end of file word by word (store word to read in word = (part of node)?)
    
        char word[LENGTH + 1];
        char format[10];
        // construct the conversion specifier to limit the word size
        //    read by fscanf()
        snprintf(format, sizeof format, "%%%ds", LENGTH);
    
        while (fscanf(file, format, word) == 1) {
            // For each word, create a new node
            node *n = malloc(sizeof(node));
            if (n == NULL) {
                fclose(file);
                return -2;
            }
            strcpy(n->word, word);
            n->next = NULL;
            nodes_counter++;
    
            // Call hash function (input: word --> output: int)
            int index = hash(word);
    
            // Insert Node into Hash Table
            n->next = table[index];
            table[index] = n;
        }
        fclose(file);
        return true;
    }
    
    // Returns number of words in dictionary if loaded, else 0 if not yet loaded
    unsigned int size(void) {
        // Return number of nodes created in Load
        return nodes_counter;
    }
    
    // Unloads dictionary from memory, returning true if successful, else false
    bool unload(void) {
        for (int i = 0; i < N; i++) {
            node *cursor = table[i];
            table[i] = NULL;
            while (cursor != NULL) {
                node *tmp = cursor;
                cursor = cursor->next;
                free(tmp);
            }
        }
        return true;
    }