Search code examples
ctriecs50

Would someone care to identify what might cause a segfault in my code?


This is for my CS50 assignment. I have to make a spell-checker that loads a dictionary into trie data structure. The segfault doesn't happen until the file size increases.

//Trie structure
typedef struct node {

    bool is_word;
    struct node* children[27];
}node;

node* root;

// maximum length for a word
#define LENGTH 45
//global counter for total words in dic
int SIZE = 0;

Loads dictionary into memory.

bool load(const char* dictionary)
{
    //Open dictionary file && safecheck
    FILE* d_file = fopen(dictionary,"r");
    if(d_file == NULL) { return false; }

    //allocate space for the first node
    root = calloc(1,sizeof(node));

    //string buffer
    char word[SIZE+1];
    //loop through each word of the dictionary and store it to word
    while(fscanf(d_file,"%s\n",word)!=EOF){
        //navigation node steping further into trie
        node* nav = root;
        int word_length = strlen(word);
        //Iterate through each letter in a word
        for(int i=0;i<word_length;i++){
            int letter_pos = tolower(word[i]) - 'a';
            //check if letter node exists
            if(nav->children[letter_pos]==NULL){
                //create node for a letter
                node* l_node = calloc(1,sizeof(node));
                if(l_node == NULL) { return false; }
                nav->children[letter_pos] = l_node;
            }
            //proceed to the next node
            if(i < word_length-1)
                nav = nav->children[letter_pos];
        }
        //set word to true;
        if(nav->is_word != true){
            nav->is_word = true;
            // counter for total words in the dictionary
            SIZE++;
        }
    }
    fclose(d_file);
    return true;
}

Solution

  • I'm not sure if this is the main problem, but this part of the code is woefully lacking in robustness:

            int letter_pos = tolower(word[i]) - 'a';
    

    If letter_pos is not valid at this point (e.g. because word[i] is not a letter) then all the subsequent operations will result in UB.

    I would at the very least add an assert here:

            int letter_pos = tolower(word[i]) - 'a';
            assert(letter_pos >= 0 && letter_pos < sizeof(nav->children) / sizeof(nav->children[0]));
    

    Since your array size is 27 I'm guessing you wanted to use the last element for non-alphabet characters, so you might want to make the code more like this:

            int letter_pos;
            if (isalpha(word[i]))                    // if 'A'..'Z' or 'a'..'z'
                letter_pos = tolower(word[i]) - 'a'; // index = 0..25
            else
                letter_pos = 'z' - 'a' + 1;          // index = 26
            assert(letter_pos >= 0 && letter_pos < sizeof(nav->children) / sizeof(nav->children[0]));