Search code examples
cpointershashtable

Broken Pointer in My HashTable or I'm Missing Something?


Here's my code,

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct DF
{
    char str[101];
    int D;
    struct DF *next;
} DF;
DF *df[5];

int hash (char str[])
{
    int sum=0, len=strlen (str);
    for (int x=0; x<len; x++) sum+=str[x];
    
    return sum%5;
}

DF* ND (char str[])
{
    DF *node=(DF*) malloc (sizeof (DF));
    strcpy (node->str, str); node->D=1;
    node->next=NULL;
    
    return node;
}

void add (char str[])
{
    int idx=hash (str);
    if (df[idx])
    {
        DF *temp=df[idx];
        while (temp) temp=temp->next;
        temp=ND (str);
    }
    else df[idx]=ND (str);
}

int main (void)
{
    char str1[]="The"; add (str1);
    char str2[]="App"; add (str2);
    
    if (df[4])
    {
        printf ("[4] %s", df[4]->str);
        DF *temp=df[4]->next;
        while (temp)
        {
            printf (" -> %s", temp->str);
            temp=temp->next;
        }
        puts ("");
    }
    
    return 0;
}

Please pay attention to void add (char[]), why the output is not [4] The -> App? Even when I changed DF *temp=df[idx]; to DF *temp=df[idx]->next; it makes no difference. But if I change it the function to this,

void add (char str[])
{
    int idx=hash (str);
    if (df[idx])
    {
        DF *temp=df[idx];
        while (temp->next) temp=temp->next;
        temp->next=ND (str);
    }
    else df[idx]=ND (str);
}

It prints out [4] The -> App. So, what's the difference between those 2 algorithms?


Solution

  • in the first way :

    temp=ND (str);
    

    just assign a local variable, so it has no impact outside the function out of the fact you have a memory leak (but the list is not modified, the element is not added)

    but in the second way :

    temp->next=ND (str);
    

    modifies the linked list

    To work you can modify the first way to do :

    void add (char str[])
    {
        int idx=hash (str);
        if (df[idx])
        {
            DF **temp=&df[idx];
            while (*temp) temp=&(*temp)->next;
            *temp=ND (str);
        }
        else df[idx]=ND (str);
    }
    

    but this is complicated for nothing except if you want to remove the if :

    void add (char str[])
    {
        DF ** temp=&df[hash(str)];
        
        while (*temp)
          temp=&(*temp)->next;
        *temp=ND (str);
    }
    

    Note to add a new cell at the end of the list of synonym is useless, you do not have global order, you can directly do :

    void add (char str[])
    {
        DF * temp=ND (str);
        int idx=hash (str);
    
        temp->next = df[idx];
        df[idx] = temp;
    }
    

    In ND :

    strcpy (node->str, str); node->D=1;
    

    is dangerous because str can be too long to be saved in node->str, you can use strncpy

    At contrarian when the string to save is small you lost memory.

    What about to not use an array for the field str but a char* and duplicate the string (strdupfor instance) ?

    In hash you go through the string two times, you do not need to compute strlen and can use

    for (int x=0; str[x] != 0; x++) sum+=str[x];