Search code examples
cfgetsstrcmp

Using fgets() together with strcmp() strcmp is not properly comparing


My current task is to code a function that's given a FILE pointer and a pointer to a string. The function should analyze how many times the string occurs in the given file and return the value as a whole number. It also needs to pay attention to case sensitivity. In my current program, I take the word "dog" as a string to be found in the file. But it keeps giving me 0 even tho the .txt file has the word dog three times in it. This is my first post here and I checked the other posts about this topic, but they couldn't fix it.

This is what I tried:

#include <stdio.h>
#include <string.h>

int searchAndCount(FILE *fp, const char *searchWord) {
    int count = 0;
    char buffer[4096];
    
    while ((fgets(buffer, sizeof(buffer), fp)) != NULL) { 
        buffer[strlen(buffer) - 1] = '\0';
        if (strcmp(buffer, searchWord) == 0) {
            count++;
        }
    }
    return count;
}

int main() {
    FILE *fp;
    int searchedWord;
    const char *c = "dog";
    
    fp = fopen("test.txt", "r");
    if (fp == NULL) {
        perror("File couldn't open properly");
        return 1;
    }
    searchedWord = searchAndCount(fp, c);
    printf("The word 'dog' occurs %d-times in the file\n", searchedWord);

    fclose(fp);

    return 0;
}

my test.txt looks like this:

dog dog dogoggo dog.

and I get this:

The word 'dog' occurs 0-times in the file

Edit: So judging from the comments, it seems like i need to implement strtok() i will be researching this function.

But using:

int searchAndCount(FILE *fp, const char *searchWord) {
    int count = 0;
    char buffer[4096];
    
    while ((fscanf(fp, "%4095s", buffer)) == 1) { 
        if (strcmp(buffer, searchWord) == 0) {
            count++;
        }
    }
    return count;
}

more or less solved the problem whereas " dog. " does not get counted because of the dot.

Solution:

int searchAndCount(FILE *fp, const char *searchWord) {
    int count = 0;
    char buffer[4096];
    
    for (;;) {
        fscanf(fp, "%*[^a-zA-Z]");
        if (fscanf(fp, "%4095[a-zA-Z]", buffer) != 1)
            break;
        if (strcmp(buffer, searchWord) == 0)
            count++;
    }
    return count;
}

Solution

  • Since you are matching words and not complete lines, you should use fscanf instead of fgets:

    #include <stdio.h>
    #include <string.h>
    
    int searchAndCount(FILE *fp, const char *searchWord) {
        int count = 0;
        char buffer[4096];
        
        // %4095s means store up to 4095 characters into buffer
        // before the null terminator
        while (fscanf(fp, "%4095s", buffer) == 1) { 
            if (strcmp(buffer, searchWord) == 0) {
                count++;
            }
        }
        return count;
    }
    
    int main(void) {
        FILE *fp;
        int count;
        const char *str = "dog";
        
        fp = fopen("test.txt", "r");
        if (fp == NULL) {
            perror("File couldn't open properly");
            return 1;
        }
        count = searchAndCount(fp, str);
        printf("The word 'dog' occurs %d times in the file\n", count);
    
        fclose(fp);
    
        return 0;
    }
    

    If you want to ignore punctuation, here is a more elaborate version:

    int searchAndCount(FILE *fp, const char *searchWord) {
        int count = 0;
        char buffer[4096];
        
        // repeat indefinitely
        for (;;) {
            // consume any separators (sequences of non letters)
            fscanf(fp, "%*[^a-zA-Z]");
            // try and read the next word
            if (fscanf(fp, "%4095[a-zA-Z]", buffer) != 1)
                break;
            // test the target word
            if (strcmp(buffer, searchWord) == 0)
                count++;
        }
        return count;
    }