This is for my CS50 assignment. I have to make a spell-checker that loads a dictionary into trie data structure. The segfault doesn't happen until the file size increases.
//Trie structure
typedef struct node {
bool is_word;
struct node* children[27];
}node;
node* root;
// maximum length for a word
#define LENGTH 45
//global counter for total words in dic
int SIZE = 0;
Loads dictionary into memory.
bool load(const char* dictionary)
{
//Open dictionary file && safecheck
FILE* d_file = fopen(dictionary,"r");
if(d_file == NULL) { return false; }
//allocate space for the first node
root = calloc(1,sizeof(node));
//string buffer
char word[SIZE+1];
//loop through each word of the dictionary and store it to word
while(fscanf(d_file,"%s\n",word)!=EOF){
//navigation node steping further into trie
node* nav = root;
int word_length = strlen(word);
//Iterate through each letter in a word
for(int i=0;i<word_length;i++){
int letter_pos = tolower(word[i]) - 'a';
//check if letter node exists
if(nav->children[letter_pos]==NULL){
//create node for a letter
node* l_node = calloc(1,sizeof(node));
if(l_node == NULL) { return false; }
nav->children[letter_pos] = l_node;
}
//proceed to the next node
if(i < word_length-1)
nav = nav->children[letter_pos];
}
//set word to true;
if(nav->is_word != true){
nav->is_word = true;
// counter for total words in the dictionary
SIZE++;
}
}
fclose(d_file);
return true;
}
I'm not sure if this is the main problem, but this part of the code is woefully lacking in robustness:
int letter_pos = tolower(word[i]) - 'a';
If letter_pos
is not valid at this point (e.g. because word[i]
is not a letter) then all the subsequent operations will result in UB.
I would at the very least add an assert
here:
int letter_pos = tolower(word[i]) - 'a';
assert(letter_pos >= 0 && letter_pos < sizeof(nav->children) / sizeof(nav->children[0]));
Since your array size is 27 I'm guessing you wanted to use the last element for non-alphabet characters, so you might want to make the code more like this:
int letter_pos;
if (isalpha(word[i])) // if 'A'..'Z' or 'a'..'z'
letter_pos = tolower(word[i]) - 'a'; // index = 0..25
else
letter_pos = 'z' - 'a' + 1; // index = 26
assert(letter_pos >= 0 && letter_pos < sizeof(nav->children) / sizeof(nav->children[0]));