Search code examples
cstrcat

Why am I having this issue with strcat in C?


I am entirely new to C and I have this code as shown below:

//Initialize Global array variable title_char_cnt_array to char[26]
char title_char_cnt_array[26];
//Initialize SearchTitle array
char *SearchTitle[6] = { "uuul", "dule", "speed", "spede", "deul", "cars" };

char *Compute_HashMapKey_for_each_title(char title[]) {
    //char *T;
    //T = title;
    Get_Individual_Character_Frequency_For_Each_Title(title);
    //Initialize alphabets array to char and assign a to z
    //Initialize HashMapKey to char
    char *HashMapKy;
    HashMapKy = malloc(sizeof(char));
    char default_str[] = { "0#" };
    char str[50];
    char ash[] = { "#" };
    int i;
    //Create HashMapKey by appending 0# to HashMapKey if a particular alphabet isn't in title or   count_of_alphabet plus # if the alphabet is in title (the format is   1#0#1#0#1#0#0#0#0#0#0#0#0#0#1#0#0#2#0#2 ... #1#0#0# for this title: Extractor for e.g)
    for (i = 0; i < 26; i++) {
        if (title_char_cnt_array[i] == 0) {
            //HashMapKey = HashMapKey . '0#';
            strcat(HashMapKy, default_str);
        } else {
            //First convert character frequency count to string
            sprintf(str, "%d", title_char_cnt_array[i]);
            //concat # to str
            strcat(str, ash);
            //HashMapKey =. title_char_cnt_array[i].'#';
            strcat(HashMapKy, str);
        }//end if
    }//End For
    return HashMapKy;
}

void Get_Individual_Character_Frequency_For_Each_Title(char Title[]) {
    int Title_Size;
    //get size of Title
    Title_Size = strlen(Title);
    int x, j;
    //Initialize current_char to char for character comparison in title_array
    char current_char;
    char Alphabets[][1] =  { "a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z" };
    //Foreach character in title array, calculate individual character frequency here
    for (x = 0; x < Title_Size; x++) {
        //Initialize current_char_cnt to 0
        int current_char_cnt = 1;
        current_char = *(Title + x);//retrieve each character of Title array
        for (j = x; j < Title_Size; j++) {
            if (current_char == *(Title + j + 1)) {
                //increment current_char_cnt
                current_char_cnt++;
            }//End If
        }
        /*Insert current_char_cnt in title_char_cnt_array against index - index no. represents position of char in alphabetical order*/
        if (current_char == Alphabets[0][0])
            if (title_char_cnt_array[0] == 0)//if no previous value inserted, then insert
                title_char_cnt_array[0] = current_char_cnt;
        if (current_char == Alphabets[1][0])
            if (title_char_cnt_array[1] == 0)
                title_char_cnt_array[1] = current_char_cnt;
        if (current_char == Alphabets[2][0])
            if (title_char_cnt_array[2] == 0)
                title_char_cnt_array[2] = current_char_cnt;
        if (current_char == Alphabets[3][0])
            if (title_char_cnt_array[3] == 0)
                title_char_cnt_array[3] = current_char_cnt;
        if (current_char == Alphabets[4][0])
            if (title_char_cnt_array[4] == 0)
                title_char_cnt_array[4] = current_char_cnt;
        if (current_char == Alphabets[5][0])
            if (title_char_cnt_array[5] == 0)
                title_char_cnt_array[5] = current_char_cnt;
        if (current_char == Alphabets[6][0])
            if (title_char_cnt_array[6] == 0)
                title_char_cnt_array[6] = current_char_cnt;
        if (current_char == Alphabets[7][0])
            if (title_char_cnt_array[7] == 0)
                title_char_cnt_array[7] = current_char_cnt;
        if (current_char == Alphabets[8][0])
            if (title_char_cnt_array[8] == 0)
                title_char_cnt_array[8] = current_char_cnt;
        if (current_char == Alphabets[9][0])
            if (title_char_cnt_array[9] == 0)
                title_char_cnt_array[9] = current_char_cnt;
        if (current_char == Alphabets[10][0])
            if (title_char_cnt_array[10] == 0)
                title_char_cnt_array[10] = current_char_cnt;
        if (current_char == Alphabets[11][0])
            if (title_char_cnt_array[11] == 0)
                title_char_cnt_array[11] = current_char_cnt;
        if (current_char == Alphabets[12][0])
            if (title_char_cnt_array[12] == 0)
                title_char_cnt_array[12] = current_char_cnt;
        if (current_char == Alphabets[13][0])
            if (title_char_cnt_array[13] == 0)
                title_char_cnt_array[13] = current_char_cnt;
        if (current_char == Alphabets[14][0])
            if (title_char_cnt_array[14] == 0)
                title_char_cnt_array[14] = current_char_cnt;
        if (current_char == Alphabets[15][0])
            if (title_char_cnt_array[15] == 0)
                title_char_cnt_array[15] = current_char_cnt;
        if (current_char == Alphabets[16][0])
            if (title_char_cnt_array[16] == 0)
                title_char_cnt_array[16] = current_char_cnt;
        if (current_char == Alphabets[17][0])
            if (title_char_cnt_array[17] == 0)
                title_char_cnt_array[17] = current_char_cnt;
        if (current_char == Alphabets[18][0])
            if (title_char_cnt_array[18] == 0)
                title_char_cnt_array[18] = current_char_cnt;
        if (current_char == Alphabets[19][0])
            if (title_char_cnt_array[19] == 0)
                title_char_cnt_array[19] = current_char_cnt;
        if (current_char == Alphabets[20][0])
            if (title_char_cnt_array[20] == 0)
                title_char_cnt_array[20] = current_char_cnt;
        if (current_char == Alphabets[21][0])
            if (title_char_cnt_array[21] == 0)
                title_char_cnt_array[21] = current_char_cnt;
        if (current_char == Alphabets[22][0])
            if (title_char_cnt_array[22] == 0)
                title_char_cnt_array[22] = current_char_cnt;
        if (current_char == Alphabets[23][0])
            if (title_char_cnt_array[23] == 0)
                title_char_cnt_array[23] = current_char_cnt;
        if (current_char == Alphabets[24][0])
            if (title_char_cnt_array[24] == 0)
                title_char_cnt_array[24] = current_char_cnt;
        if (current_char == Alphabets[25][0])
            if (title_char_cnt_array[25] == 0)
                title_char_cnt_array[25] = current_char_cnt;
    }//End For
}

int main() {
    int i;
    //Foreach element in title_char_cnt_array, assign an in value of -1
    for (i = 0; i < 26; i++)
        title_char_cnt_array[i] = 0;

    //Initialize HashMap array
    struct node *HashMap[9];
    int HM_indx = 0;
    for (HM_indx = 0; HM_indx < 9; HM_indx++)
        HashMap[HM_indx] = NULL;
    int sz = 6; //6 titles to insert in hashmap
    //Initialize SearchTitle array
    //char *SearchTitle[6] = { "uuul", "dule", "speed", "spede", "deul", "cars" };

    char *HMKey;
    int x;
    for (x = 0; x < sz; x++) {
        HMKey = Compute_HashMapKey_for_each_title(SearchTitle[x]);
        //printf("%s\n", HMKey);
        Insert_Title_To_HashMap_Using_HashMapKey(x, HMKey, HashMap);
    }

    free(HMKey);
    return 0;
}

What I expect Compute_HashMapKey_for_each_title(SearchTitle[x]) to return to HMKey in main function is:

HashMapKey="0#0#0#0#0#0#0#0#0#0#0#1#0#0#0#0#0#0#0#0#3#0#0#0#0#0#"

But what I see instead using the debugger is:

HashMapKey=0xb41700 "À", '«' <repeats 16 times>, "þîþîþîþîþîþîþîþ0#0#0#0#0#0#0#0#0#0#0#1#0#0#0#0#0#0#0#0#3#0#0#0#0#0#"

As you can see from the main function, once I get HMKey; I pass it to the function Insert_Title_To_HashMap_Using_HashMapKey(x, HMKey, HashMap) and then break up my expected string: "0#0#0#0#0#0#0#0#0#0#0#1#0#0#0#0#0#0#0#0#3#0#0#0#0#0#" into single characters, typecast each character to its ASCII number and sum then up. So I expect to see 0 as the first character, # as the second character, 0 as the third character and so on, but what I see as the first character using the debugger is À which is the first character after the address in the return value of Compute_HashMapKey_for_each_title(SearchTitle[x]) to return to HMKey.

Even my expected strlen in this case should be 52 but instead what I see is: 84.

I have no idea what's going on, please any help will be highly appreciated.


Solution

  • There is a major problem in Compute_HashMapKey_for_each_title: you do not allocate enough space for the hash string, malloc(sizeof(char)) allocates a single byte.

    The string can be as large as 26 times 5 plus the null terminator (5 bytes if title_char_cnt_array is an array of char, but 12 if you change that to int). I suggest using a local array and returning an allocated copy using strdup():

    char *Compute_HashMapKey_for_each_title(char title[]) {
        char buf[26 * 12 + 1];
        int pos = 0;
        Get_Individual_Character_Frequency_For_Each_Title(title);
        // Create HashMapKey by appending the number of occurrences for each character
        //  followed by a `#` char.
        for (int i = 0; i < 26; i++) {
            pos += sprintf(buf + pos, "%d#", title_char_cnt_array[i]);
        }
        return strdup(buf);
    }
    

    The function Get_Individual_Character_Frequency_For_Each_Title can be greatly simplified:

    void Get_Individual_Character_Frequency_For_Each_Title(char Title[]) {
        const char *alphabet = "abcdefghijklmnopqrstuvwyxz";
        for (int i = 0; i < 26; i++) {
            title_char_cnt_array[i] = 0;
        }
        for (int i = 0; Title[i] != '\0'; i++) {
            char *p = strchr(alphabet, Title[i]);
            if (p != NULL) {
                title_char_cnt_array[p - alphabet] += 1;
            }
        }
    }
    

    If you can assume the system uses thee ASCII character set, the function can be even simpler:

    void Get_Individual_Character_Frequency_For_Each_Title(char Title[]) {
        for (int i = 0; i < 26; i++) {
            title_char_cnt_array[i] = 0;
        }
        for (int i = 0; Title[i] != '\0'; i++) {
            char c = Title[i];
            if (c >= 'a' && c <= 'z') {
                title_char_cnt_array[c - 'a'] += 1;
            }
        }
    }
    

    Note that title_char_cnt_array should be passed as an argument and should have an integer type larger than char.