Search code examples
cstringsortingstructletter

Sorting letters in an array by frequency in within a struct


I am trying to sort letters by frequency in a string. In the case that two or more letters have the same frequency, the letters with the same frequency would be sorted in alphabetical order.

This is what I managed to get so far

void get_text_statistics(const char *text, size_t len, statistics_t *data)
{
    *data = (statistics_t)
        {
            .sentences          = count_sentences(text, len),
            .words              = count_words(text, len),
            .most_freq_chars    = {/*something needs to be here*/}
        }

        get_letter_frequencies(text, len, &data -> freq[0], &data -> max_freq)
}

As you can see, my problem here is trying to sort the letters in a string by freq. I've tried looking up some tutorials but I couldn't find something similar to this specific example. Here is the struct concerned.

typedef struct statistics
{
    char_counts_t char_info;
    int sentences;
    int words;
    int freq[26];
    int max_freq;
    char most_freq_chars[27];
} statistics_t;

A bit earlier, I managed to make this function which may help.

void get_letter_frequencies(const char *text, size_t len, int freq[26], int *max_freq)
{

    for (int i = 0; i < 26; i++)
        freq[i] = 0;

    for (int i = 0; i < len; i++) {
        if ((text[i] >= 97) && (text[i] <= 122))
            freq[text[i] - 97]++;

    *max_freq = 0;
    for (int i = 0; i < 26; i++)
        if (*max_freq < freq[i])
            *max_freq = freq[i];
}

How would I go about this? TIA

p.s: count_sentences and count_words are functions which count sentences and words in the string.


Solution

  • One solution I thought of creates a structure that holds both the character and the frequency and did sort using qsort with a custom comparison function. This way the frequency limit is INT_MAX.

    A more hacky way would be to sort an array of integers and for each poison use freq*128 + ('a' + i), do a normal integer array sort using greater and then get the characters using most_freq_chars = freq_array[i]%128

    I hope it helps you =]

    #include <stdio.h>      /* printf */
    #include <stdlib.h>     /* qsort */
    #include <string.h>     /* strlen */
    
    typedef struct freq_pair {
      char c;
      int freq;
    } freq_pair_t;
    
    typedef struct statistics {
      char_counts_t char_info;
      int sentences;
      int words;
      int freq[26];
      int max_freq;
      char most_freq_chars[26]; // You don't need 27 spaces here
    } statistics_t;
    
    void get_letter_frequencies(const char* text, size_t len, int freq[26], int* max_freq) {
      for (int i = 0; i < 26; i++) {
        freq[i] = 0;
      }
    
      for (int i = 0; i < len; i++) {
        if ((text[i] >= 'a') && (text[i] <= 'z')) {
          freq[text[i] - 'a']++;
        }
      }
    
      *max_freq = 0;
      for (int i = 0; i < 26; i++) {
        if (*max_freq < freq[i]) {
          *max_freq = freq[i];
        }
      }
    }
    
    int compare(const void* a, const void* b) {
      freq_pair_t* pa = (freq_pair_t*)a;
      freq_pair_t* pb = (freq_pair_t*)b;
    
      if (pa->freq > pb->freq) {
        return -1;
      }
    
      if (pa->freq == pb->freq) {
        if (pa->c < pb->c) {
          return -1;
        }
        if (pa->c > pb->c) {
          return 1;
        }
    
        return 0;
      }
    
      if (pa->freq < pb->freq) {
        return 1;
      }
    }
    
    void get_text_statistics(const char* text, size_t len, statistics_t* data) {
      *data = (statistics_t){
          .sentences = count_sentences(text, len),
          .words = count_words(text, len),
          /* Do not init most_freq_chars here, let it for after you calc all the frequencies */
      };
    
      get_letter_frequencies(text, len, &data->freq[0], &data->max_freq);
      freq_pair_t freq_pairs[26];
    
      for (int i = 0; i < 26; i++) {
        freq_pairs[i].freq = data->freq[i];
        freq_pairs[i].c = 'a' + i;
      }
    
      qsort(freq_pairs, 26, sizeof(freq_pair_t), compare);
      for (int i = 0; i < 26; i++) {
        data->most_freq_chars[i] = freq_pairs[i].c;
      }
    }
    
    int main() {
      char* s = "quero mudar o mundo, cruzar os ceus sem nada temer";
      statistics_t data;
      get_text_statistics(s, strlen(s), &data);
    
      for (int i = 0; i < 26; i++) {
        printf("%c ", data.most_freq_chars[i]);
      }
      printf("\n");
    
      for (int i = 0; i < 26; i++) {
        printf("%c-%d ", 'a' + i, data.freq[i]);
      }
      printf("\n");
    }