My current assignment in university is to read in words from a file and count the occurrences of each word in that file, printing my results to the console/new file.
I have been successful in counting the word occurrences, however, I am having difficulty removing the rest of the values.
E.g. I want to remove the additional occurrences of C, but keep its count.
C : 2
they : 2
are : 1
not : 1
they : 2
written : 1
in : 1
C : 2
Here is the code I have at the moment...
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define SIZE 256
struct words
{
char *word;
unsigned int count;
};
int count_words()
{
// Allocate memory for first word to compare + words struct
char *key_word = (char*)malloc(sizeof(char) * SIZE);
struct words *w = (struct words*)malloc(sizeof(struct words) * SIZE);
// Create variable to read words from file
FILE *word_list = fopen("single_words_test.txt", "r");
// Variable to store total word count
int total_words = 0;
// Read in words from file line by line
while (fgets(key_word, SIZE, word_list) != NULL)
{
// Remove the newline character
key_word[strlen(key_word) - 1] = '\0';
// Initialize members of words structure
w->word = key_word;
w->count = 0;
// Allocate memory for current word being compared to key_word
char *current_word = (char*)malloc(sizeof(char) * SIZE);
// Create variable to read second list of words from file
FILE *word_list2 = fopen("single_words_test.txt", "r");
while (fgets(current_word, SIZE, word_list2) != NULL)
{
// Remove newline character
current_word[strlen(current_word) - 1] = '\0';
// If currrent read word matches keyword, increase its count
if (strcmp(key_word, current_word) == 0)
{
w->count++;
}
}
// Free the allocated memory
free(current_word);
fclose(word_list2);
total_words++;
printf("%s : %d\n", w->word, w->count);
}
free(w);
free(key_word);
fclose(word_list);
return total_words;
}
int main(int argc, char **argv)
{
printf("\n\n\n%d\n\n\n", count_words());
return 0;
}
I know the code is messy but I have been stuck on this for some time and I am unsure of how to implement it into my current solution.
Also, I know this could be done by creating a linked-list but I want to avoid that solution and keep it similar to the current solution.
Thank you and sorry for the ambiguity of the question
EDIT: This isn't a code request. I would just like some general guidance as to what I could use
Check to see if you found the current_word
earlier in the list than the key_word
you're looking for. You can do this using ftell(...)
on the file pointer.
Here's an example;
int count_words()
{
// Allocate memory for first word to compare + words struct
char *key_word = (char*)malloc(sizeof(char) * SIZE);
struct words *w = (struct words*)malloc(sizeof(struct words) * SIZE);
// Create variable to read words from file
FILE *word_list = fopen("single_words_test.txt", "r");
// Variable to store total word count
int total_words = 0;
long word_list_pos, word_list2_pos;
// Read in words from file line by line
while (fgets(key_word, SIZE, word_list) != NULL)
{
// Remove the newline character
key_word[strlen(key_word) - 1] = '\0';
// Initialize members of words structure
w->word = strdup(key_word);
w->count = 0;
word_list_pos = ftell(word_list);
// Allocate memory for current word being compared to key_word
char *current_word = (char*)malloc(sizeof(char) * SIZE);
// Create variable to read second list of words from file
FILE *word_list2 = fopen("single_words_test.txt", "r");
while (fgets(current_word, SIZE, word_list2) != NULL)
{
// Remove newline character
current_word[strlen(current_word) - 1] = '\0';
// If currrent read word matches keyword, increase its count
if (strcmp(key_word, current_word) == 0)
{
word_list2_pos = ftell(word_list2);
if (word_list2_pos < word_list_pos)
break;
w->count++;
}
}
total_words++;
if (word_list2_pos >= word_list_pos)
printf("%s : %d\n", w->word, w->count);
// Free the allocated memory
free(w->word);
free(current_word);
fclose(word_list2);
}
free(w);
free(key_word);
fclose(word_list);
return total_words;
}