Search code examples
chashhash-collision

How can I resolve the collision in the hashing in this code I did? Currently cannot search for NG CHEA YEAT's ID only


I have the following text file

1171203258:HOSSAIN, MARUF
1181202660:KUHAN RAJ A/L TAMIL CHEL WAM
1181203465:PONG KAI SUN
1191102443:FAIZA OSAMA ABDALLA HASHIM
1201302289:LEE JIA WEI
1201302368:SHEIKH, AHNAF AZMAIN
1201100584:HI CHIA LING
1201101509:NG CHEA YEAT
1191103201:PHUAH CHEE HAOU
1201100879:MOSTAFA ARABY MADBOULY AHMED
1191103215:TONG JUN YANG
1191103119:ANG QIZHENG
1171302286:DARWIN KUMAR A/L MUNIAN
1181101192:HAIZUN NAJWA BINTI MOHD RIFIN
1201100926:NG XUE NIE
1191302417:ALMARHOON, ALI HUSSAIN A
1201100225:HEMAN RAO A/L SUBRAMANIAM
1181100823:LIM ZHEN BANG
1161202587:SOHEIL PRAKASAN SUPPAN
1201100603:AVINASH MURALI
1181101858:CHEAH KOK YEW
1191103071:GAN WEI TONG
1201100301:KEVIN THAM ZHENG YIT
1201100648:LIM CHER AIK
1201302222:SHIVAA RUTRAN A/L NAGATHEESAN
1201100779:TAN WEI XIANG
1191100919:WONG HONG WEI

The code I have for now, work well but have collision in the hashing I think

Here is what I have so far:

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define MDIR 27 //size of list
#define MBUFF 256
#define MHASH 109 //hash function is %109
#define MNAME 40 



struct List{
    char name[40];
    int studID;
};

//function prototype
int comparator(const void* p, const void* q){
    return strcmp(((struct List*)p)->name,((struct List*)q)->name);
}
int readData(struct List dir[]);  
int hashfunc(char *name);
void hash(struct List dir[], int ndir,
int hashtable[]);

int search(char *key,
struct List s[], int hashtable[]);

//main function
int main(){
    int ndir, result, hashtable[MHASH];
    int count;
    int i;
    int j;
    struct List s[27];
    char temp[27];
    char query[40];
    
    FILE *fptr;
    fptr = fopen("rec.txt", "r+");
    if (fptr != NULL) {
    printf("File created successfully!\n");
  }
  else {
    printf("Failed to create the file.\n");
    // exit status for OS that an error occurred
    return -1;
  }
  
   for(count = 0; count < 27; count++){
       fscanf(fptr,"%d", &s[count].studID);
       fgets(s[count].name,40,fptr);
       
   }
      

qsort

   qsort(s,27,sizeof(struct List),comparator);

printing the sorted name then continue the hashing of searching

            //printing sorted name
            printf("Sorted Names\n");
            for(i=0;i<27;i++){
                printf("%d%s\n", i+1, s[i].name);
                }
                fclose(fptr);

hashing of searching part

ndir=readData(s);
hash(s,ndir,hashtable);
puts("\nName to search>>");
fgets(query,MNAME-1,stdin);
query[strlen(query)-1]='\0';
result=search(query,s,hashtable);
if(result==-1)
printf("Not Found");
else
printf("%s's ID is %d\n",
s[result].name, s[result].studID);
    
                return 0;
        }

read function

int readData(struct List dir[]){

FILE *fdir=fopen("rec.txt","r");
char buff[MBUFF];
int i=0;
while(i<MDIR && fgets(buff,MBUFF-1,fdir)){
dir[i].studID=atol(strtok(buff,":"));   
strcpy(dir[i].name,strtok(NULL, "\n"));
i++;
}
return(i);
}

hash function

int hashfunc(char *name){
long sum=0;
int k=0;
while(name[k]){
sum+=name[k];
k++;
}
return( (int) (sum % MHASH) );
}

hash function

void hash(struct List dir[], int ndir,
int hashtable[]){
int k;
int index;
for(k=0;k<ndir;k++){
index = hashfunc(dir[k].name);
hashtable[index]=k;
}
}

search function

int search(char *key, struct List dir[],
int hashtable[]){
int index=hashfunc(key);
int k=hashtable[index];
if(strcmp(key,dir[k].name)==0)
return(k);
else
return(-1);

}

I am not sure for the hashing of searching part


Solution

  • Whenever faced with a need to separate fields in a line of data, the normal approach is to read an entire line of data as a string into a buffer (character array). Then you separate what you need from the buffer using whatever method fits the data the best. Either using a pair of pointers to bracket the text you need and then copying the characters between the pointers. You can automate the process using string functions like strchr() to locate the ':' in the buffer. You can also use string functions like strtok() to split the buffer into tokens on any given set of delimiters.

    However here there is an even simpler method. Since you have a fixed format for the studID and name in the line, you can simply use sscanf(), e.g.

    #include <stdio.h>
    #include <stdlib.h>
    
    #define MXSTUD 30       /* if you need a constant, #define one (or more) */
    #define MXNAME 40
    
    typedef struct list {   /* adding typedef for convenience */
      char name[MXNAME];
      unsigned studID;
    } list;
    
    ...
    
    int main (int argc, char **argv) {
      
      int count = 0;                      /* count of students */
      char buf[MXNAME * 2];               /* temprorary storage for line */
      list s[MXSTUD] = {{ .name = "" }};  /* list array initialized all 0 */
      /* open filename given as 1st argument or "rec.text" if none given */
      FILE *fptr = fopen (argc > 1 ? argv[1] : "rec.text", "r");
      
      if (!fptr) {  /* validate file open for reading */
        fputs ("error: file open failed\n", stderr);
        return 1;
      }
      
      while (fgets (buf, sizeof buf, fptr)) { /* read each line into buf */
        /* separate studID and name using sscanf() */
        if (sscanf (buf, "%u:%39[^\n]", &s[count].studID, s[count].name) == 2) {
          count += 1;   /* increment count on success */
        }
      }
      ...
    

    That's all that is needed to read each line of data and separate the line into studID and name storing each in an element of the list array of struct.

    Use qsort() For Sorting

    Regardless of whether you have an array or allocated block of memory containing objects, qsort() provides a simple and efficient way to sort it. All you need to do is write a compare() function telling qsort() how to compare the elements. The declaration for the qsort() compare function is:

    int compare (const void *a, const void *b);`
    

    Where a and b are simple pointers-to elements of your array to be compared. So when writing the function, all you need to do is cast a and b to the proper type and write the logic to compare whatever you like in the two elements. A negative return means a sorts before b and a positive return means b sorts before a. A zero return means the elements are equal.

    Casting the a and b to type const list * (you include const since the data isn't modified which allows the compiler freedom to optimize more fully), you simply loop over each name comparing characters and returning when two characters differ or the end of file is reached. Here, to sort your s[] array by name you can do:

    /* qsort compare function lexagraphically sorts words */
    int compare (const void *a, const void *b)
    {
      /* a & b are pointers to adjacent list elements, (pointers to list) */
      const list *sa = (const list *)a,
                 *sb = (const list *)b;
      
      const char *na = sa->name,    /* pointers to name in each element */
                 *nb = sb->name;
      
      /* loop advancing a character in each word per-iteration */
      for (;; na++, nb++) {
        /* if characters differ or at end of either */
        if (*na != *nb || !*na)
          break;
      }
      
      return (*na > *nb) - (*na < *nb);   /* return sort order */
    }
    

    Then to sort your array of list (your s[] array) with qsort(), all that is needed is:

      qsort (s, count, sizeof *s, compare);   /* sort array by name */
    

    Putting it all together in a short program that reads from the filename given as the first argument to the program (or from "rec.text" by default if no argument is given), you can do:

    #include <stdio.h>
    #include <stdlib.h>
    
    #define MXSTUD 30       /* if you need a constant, #define one (or more) */
    #define MXNAME 40
    
    typedef struct list {   /* adding typedef for convenience */
      char name[MXNAME];
      unsigned studID;
    } list;
    
    /* qsort compare function lexagraphically sorts words */
    int compare (const void *a, const void *b)
    {
      /* a & b are pointers to adjacent list elements, (pointers to list) */
      const list *sa = (const list *)a,
                 *sb = (const list *)b;
      
      const char *na = sa->name,    /* pointers to name in each element */
                 *nb = sb->name;
      
      /* loop advancing a character in each word per-iteration */
      for (;; na++, nb++) {
        /* if characters differ or at end of either */
        if (*na != *nb || !*na)
          break;
      }
      
      return (*na > *nb) - (*na < *nb);   /* return sort order */
    }
    
    int main (int argc, char **argv) {
      
      int count = 0;                      /* count of students */
      char buf[MXNAME * 2];               /* temprorary storage for line */
      list s[MXSTUD] = {{ .name = "" }};  /* list array initialized all 0 */
      /* open filename given as 1st argument or "rec.text" if none given */
      FILE *fptr = fopen (argc > 1 ? argv[1] : "rec.text", "r");
      
      if (!fptr) {  /* validate file open for reading */
        fputs ("error: file open failed\n", stderr);
        return 1;
      }
      
      while (fgets (buf, sizeof buf, fptr)) { /* read each line into buf */
        /* separate studID and name using sscanf() */
        if (sscanf (buf, "%u:%39[^\n]", &s[count].studID, s[count].name) == 2) {
          count += 1;   /* increment count on success */
        }
      }
      
      qsort (s, count, sizeof *s, compare);   /* sort array by name */
      
      for (int i = 0; i < count; i++) {   /* output results */
        printf ("%2d  %10u  %s\n", i + 1, s[i].studID, s[i].name);
      }
    }
    

    (note: you simply need to open the file in read mode "r")

    Example Use/Output

    With your data in a file named dat/studIDlist.txt, for the 27 students in your data you would get:

    $ ./bin/studIDlist dat/studIDlist.txt
     1  1191302417  ALMARHOON, ALI HUSSAIN A
     2  1191103119  ANG QIZHENG
     3  1201100603  AVINASH MURALI
     4  1181101858  CHEAH KOK YEW
     5  1171302286  DARWIN KUMAR A/L MUNIAN
     6  1191102443  FAIZA OSAMA ABDALLA HASHIM
     7  1191103071  GAN WEI TONG
     8  1181101192  HAIZUN NAJWA BINTI MOHD RIFIN
     9  1201100225  HEMAN RAO A/L SUBRAMANIAM
    10  1201100584  HI CHIA LING
    11  1171203258  HOSSAIN, MARUF
    12  1201100301  KEVIN THAM ZHENG YIT
    13  1181202660  KUHAN RAJ A/L TAMIL CHEL WAM
    14  1201302289  LEE JIA WEI
    15  1201100648  LIM CHER AIK
    16  1181100823  LIM ZHEN BANG
    17  1201100879  MOSTAFA ARABY MADBOULY AHMED
    18  1201101509  NG CHEA YEAT
    19  1201100926  NG XUE NIE
    20  1191103201  PHUAH CHEE HAOU
    21  1181203465  PONG KAI SUN
    22  1201302368  SHEIKH, AHNAF AZMAIN
    23  1201302222  SHIVAA RUTRAN A/L NAGATHEESAN
    24  1161202587  SOHEIL PRAKASAN SUPPAN
    25  1201100779  TAN WEI XIANG
    26  1191103215  TONG JUN YANG
    27  1191100919  WONG HONG WEI