I am trying to sort letters by frequency in a string. In the case that two or more letters have the same frequency, the letters with the same frequency would be sorted in alphabetical order.
This is what I managed to get so far
void get_text_statistics(const char *text, size_t len, statistics_t *data)
{
*data = (statistics_t)
{
.sentences = count_sentences(text, len),
.words = count_words(text, len),
.most_freq_chars = {/*something needs to be here*/}
}
get_letter_frequencies(text, len, &data -> freq[0], &data -> max_freq)
}
As you can see, my problem here is trying to sort the letters in a string by freq. I've tried looking up some tutorials but I couldn't find something similar to this specific example. Here is the struct concerned.
typedef struct statistics
{
char_counts_t char_info;
int sentences;
int words;
int freq[26];
int max_freq;
char most_freq_chars[27];
} statistics_t;
A bit earlier, I managed to make this function which may help.
void get_letter_frequencies(const char *text, size_t len, int freq[26], int *max_freq)
{
for (int i = 0; i < 26; i++)
freq[i] = 0;
for (int i = 0; i < len; i++) {
if ((text[i] >= 97) && (text[i] <= 122))
freq[text[i] - 97]++;
*max_freq = 0;
for (int i = 0; i < 26; i++)
if (*max_freq < freq[i])
*max_freq = freq[i];
}
How would I go about this? TIA
p.s: count_sentences
and count_words
are functions which count sentences and words in the string.
One solution I thought of creates a structure that holds both the character and the frequency and did sort using qsort
with a custom comparison function. This way the frequency limit is INT_MAX.
A more hacky way would be to sort an array of integers and for each poison use freq*128 + ('a' + i)
, do a normal integer array sort using greater
and then get the characters using most_freq_chars = freq_array[i]%128
I hope it helps you =]
#include <stdio.h> /* printf */
#include <stdlib.h> /* qsort */
#include <string.h> /* strlen */
typedef struct freq_pair {
char c;
int freq;
} freq_pair_t;
typedef struct statistics {
char_counts_t char_info;
int sentences;
int words;
int freq[26];
int max_freq;
char most_freq_chars[26]; // You don't need 27 spaces here
} statistics_t;
void get_letter_frequencies(const char* text, size_t len, int freq[26], int* max_freq) {
for (int i = 0; i < 26; i++) {
freq[i] = 0;
}
for (int i = 0; i < len; i++) {
if ((text[i] >= 'a') && (text[i] <= 'z')) {
freq[text[i] - 'a']++;
}
}
*max_freq = 0;
for (int i = 0; i < 26; i++) {
if (*max_freq < freq[i]) {
*max_freq = freq[i];
}
}
}
int compare(const void* a, const void* b) {
freq_pair_t* pa = (freq_pair_t*)a;
freq_pair_t* pb = (freq_pair_t*)b;
if (pa->freq > pb->freq) {
return -1;
}
if (pa->freq == pb->freq) {
if (pa->c < pb->c) {
return -1;
}
if (pa->c > pb->c) {
return 1;
}
return 0;
}
if (pa->freq < pb->freq) {
return 1;
}
}
void get_text_statistics(const char* text, size_t len, statistics_t* data) {
*data = (statistics_t){
.sentences = count_sentences(text, len),
.words = count_words(text, len),
/* Do not init most_freq_chars here, let it for after you calc all the frequencies */
};
get_letter_frequencies(text, len, &data->freq[0], &data->max_freq);
freq_pair_t freq_pairs[26];
for (int i = 0; i < 26; i++) {
freq_pairs[i].freq = data->freq[i];
freq_pairs[i].c = 'a' + i;
}
qsort(freq_pairs, 26, sizeof(freq_pair_t), compare);
for (int i = 0; i < 26; i++) {
data->most_freq_chars[i] = freq_pairs[i].c;
}
}
int main() {
char* s = "quero mudar o mundo, cruzar os ceus sem nada temer";
statistics_t data;
get_text_statistics(s, strlen(s), &data);
for (int i = 0; i < 26; i++) {
printf("%c ", data.most_freq_chars[i]);
}
printf("\n");
for (int i = 0; i < 26; i++) {
printf("%c-%d ", 'a' + i, data.freq[i]);
}
printf("\n");
}