Search code examples
carrayscharwords

How do would I alphabetize the strings in my character array?


I've written a program to count the occurrences of letters and words from a string that the user enters. I have successfully gotten most of it to work now, however, I must also alphabetize the words I have stored in my array of pointers. I saw a function void sortstring() that was supposed to to just this but it doesn't seem to work at all. How would I go about it?

#include <stdio.h>
#include <stdlib.h>
#include <string.h>


void findLetters(char *ptr);
void findWords(char *point);
void sort_string(char *p);

int main()
{
    char textStream[100]; //up to 98 characters and '\n\ and '\0'

    printf("enter some text\n");
    if (fgets(textStream, sizeof (textStream), stdin)) //input up to 99 characters
    {
        findLetters(textStream);
        findWords(textStream);
        sort_string(textStream);
    }
    else
    {
        printf("fgets failed\n");
    }

    return 0;
}

void findLetters(char *ptr) //find occurences of all letters
{
    int upLetters[26];
    int loLetters[26];
    int i;
    int index;

    for (i = 0; i < 26; i++) // set array to all zero
    {
        upLetters[i] = 0;
        loLetters[i] = 0;
    }
    i = 0;
    while (ptr[i] != '\0') // loop until prt[i] is '\0'
    {
        if (ptr[i] >= 'A' && ptr[i] <= 'Z') //stores occurrences of uppercase letters
        {
            index = ptr[i] - 'A';// subtract 'A' to get index 0-25
            upLetters[index]++;//add one
        }

        if (ptr[i] >= 'a' && ptr[i] <= 'z') //stores occurrences of lowercase letters
        {
            index = ptr[i] - 'a';//subtract 'a' to get index 0-25
            loLetters[index]++;//add one
        }
        i++;//next character in ptr
    }
    printf("Number of Occurrences of Uppercase letters\n\n");
    for (i = 0; i < 26; i++)//loop through 0 to 25
    {
        if (upLetters[i] > 0)
        {
            printf("%c : \t%d\n", (char)(i + 'A'), upLetters[i]);
            // add 'A' to go from an index back to a character
        }
    }
    printf("\n");
    printf("Number of Occurrences of Lowercase letters\n\n");
    for (i = 0; i < 26; i++)
    {
        if (loLetters[i] > 0)
        {
            printf("%c : \t%d\n", (char)(i + 'a'), loLetters[i]);
            // add 'a' to go back from an index to a character
        }
    }
    printf("\n");
}

void findWords(char *point)
{
    int i = 0;
    int k = 0;
    int count = 0;
    int j = 0;
    int space = 0;
    int c = 0;
    int len = strlen(point);
    char copy[50][100];
    char* delim = "{ } . , ( ) ";
    char **word;
    char *newpoint;
    char *newerpoint;
    char *token;
    int occur[50]; // will store number of occurances of each word

    for (; i < 50; i++) //sets all indexes to 0
    {
        occur[i] = 0;
    }

    for (i = 0; i < len; i++) //counts # of spaces between words
    {
        if ((point[i] == ' ') || (point[i] == ',') || (point[i] == '.'))
        {
            space++;
        }
    }
    word = malloc(sizeof(char*)*(space+1)); //allocates memory to array according to number of words
    newpoint = malloc(strlen(point)+1);
    strcpy(newpoint, point);
    newerpoint = malloc(strlen(point) + 1);
    strcpy(newerpoint, point);
    token = strtok(newpoint, delim);

    for (k; k <= space && token != NULL; k++)
    {
        word[k] = malloc(strlen(token) + 1);
        strcpy(word[k], token);
        token = strtok(NULL, delim);
        printf("%s\n", word[k]);
    }

    for (k = 0; k <= space; k++)
    {
        free(word[k]);
    }
}

void sort_string(char *p)
    {
        int c, d = 0, length;
        char *pointer, *result, ch;

        length = strlen(p);
        result = (char*)malloc(length + 1);

        pointer = p;

        for (ch = 'a'; ch <= 'z'; ch++)
        {
            for (c = 0; c < length; c++)
            {
                if (pointer == ch)
                {
                    *(result + d) = *pointer;
                    d++;
                }
                pointer++;
            }
            pointer = p;
        }
        *(result + d) = '\0';

        strcpy(p, result);
        free(result);
    }

EDITED VERSION:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

void findLetters(char *ptr);
void findWords(char *point);
int compare_str (const void *a, const void *b);


int main (void)
{
    char textStream[100] = {0};     //up to 98 characters and '\n\ and '\0'
    typedef unsigned int size_t;
    size_t len = 0;
    printf ("enter some text\n");
    if (fgets (textStream, sizeof textStream, stdin)) //input up to 99 characters
    {
        len = strlen (textStream);
        textStream[--len] = 0;      // strip newline from end of textStream

        findLetters (textStream);
        findWords (textStream);
    }
    else
    {
        printf("fgets failed\n");
    }

    return 0;
}

void findLetters(char *ptr) //find occurences of all letters
{
    int upLetters[26];
    int loLetters[26];
    int i;
    int index;

    for (i = 0; i < 26; i++) // set array to all zero
    {
        upLetters[i] = 0;
        loLetters[i] = 0;
    }
    i = 0;
    while (ptr[i] != '\0') // loop until prt[i] is '\0'
    {
        if (ptr[i] >= 'A' && ptr[i] <= 'Z') //stores occurrences of uppercase letters
        {
            index = ptr[i] - 'A';// subtract 'A' to get index 0-25
            upLetters[index]++;//add one
        }

        if (ptr[i] >= 'a' && ptr[i] <= 'z') //stores occurrences of lowercase letters
        {
            index = ptr[i] - 'a';//subtract 'a' to get index 0-25
            loLetters[index]++;//add one
        }
        i++;//next character in ptr
    }
    printf("Number of Occurrences of Uppercase letters\n\n");
    for (i = 0; i < 26; i++)//loop through 0 to 25
    {
        if (upLetters[i] > 0)
        {
            printf("%c : \t%d\n", (char)(i + 'A'), upLetters[i]);
            // add 'A' to go from an index back to a character
        }
    }
    printf("\n");
    printf("Number of Occurrences of Lowercase letters\n\n");
    for (i = 0; i < 26; i++)
    {
        if (loLetters[i] > 0)
        {
            printf("%c : \t%d\n", (char)(i + 'a'), loLetters[i]);
            // add 'a' to go back from an index to a character
        }
    }
    printf("\n");
}

void findWords(char *point)
{
    int i, k, count, space;
    int len = strlen (point);
    char *delim = "\n { } . , ( ) ";
    char **word = NULL;
    char *newpoint = NULL;
    char *token = NULL;

    i = k = count = space = 0;

    for (i = 0; i < len; i++) //counts # of spaces between words
        if ((point[i] == ' ') || (point[i] == ',') || (point[i] == '.'))
            space++;

    word = malloc (sizeof *word * space + 1); //allocates memory to array according to number of words
    newpoint = malloc (strlen (point) + 1);
    strcpy (newpoint, point);
    token = strtok (newpoint, delim);

    printf ("\nSeparating and saving words in pointer array:\n\n");
    for (k = 0; token != NULL; k++)
    {
        word[k] = malloc (strlen (token) + 1);
        strcpy (word[k], token);
        token = strtok (NULL, delim);
        printf ("%s\n", word[k]);
    }

    count = k;  /* save number of string in word */

    qsort (word, count, sizeof *word, compare_str);     /* sort the array of pointers */

    printf ("\nSorted words in pointer array:\n\n");
    for (k = 0; k < count; k++)
        printf ("%s\n", word[k]);

    for (k = 0; k < count; k++)
    {
        free(word[k]);
    }
}


int compare_str (const void *a, const void *b)
{
    const char **ia = (const char **)a;
    const char **ib = (const char **)b;
    return strcmp(*ia, *ib);
}

Solution

  • When you do get it working, I think you are in for a surprise. The sort_string function has nothing whatsoever to do with sorting words in an array of pointers instead, it sorts characters in an array. For example:

    $ ./bin/str_sort_words
    
    Enter a string: a quick brown fox jumps over the lazy dog
      sorted words: aabcdeefghijklmnoooopqrrstuuvwxyz
    

    Instead you need to sort the array of pointers with qsort so that you are actually sorting words instead of characters. To use qsort you must provide it a compare function so that it knows the size and number of items to sort. For an array of pointers, a qsort compare function looks like this:

    /* qsort C-string comparison function */
    int compare_str (const void *a, const void *b)
    {
        const char **ia = (const char **)a;
        const char **ib = (const char **)b;
        return strcmp(*ia, *ib);
    }
    

    Then in your case, you must call it in the findWords function where the pointer array word lives as follows:

    qsort (word, count, sizeof *word, compare_str);
    

    (where count is your space + 1 equivalent). Now before we look at the answer, you need to enable Warnings when you compile (which your code doesn't due to the pointer/int mismatch in if (pointer == ch) which should be if (*pointer == ch).

    But beyond that, compiling with warnings enabled would have pointed out a whole list of problems in findWords. To enable warnings add -Wall -Wextra to your compile string.

    Now let's look at changes to your code. Always initialize All variables:

    char textStream[100] = {0};     //up to 98 characters and '\n\ and '\0'
    size_t len = 0;
    

    Next, when using fgets or getline, it is a good idea to strip the tailing newline so it isn't dangling off your strings:

    if (fgets (textStream, sizeof textStream, stdin)) //input up to 99 characters
    {
        len = strlen (textStream);
        textStream[--len] = 0;      // strip newline from end of textStream
        ...
    

    Now, changes to findWords:

    void findWords(char *point)
    {
        int i = 0;
        int k = 0;
        int count = 0;
        // int j = 0;
        int space = 0;
        // int c = 0;
        int len = strlen(point);
        // char copy[50][100];
        char* delim = "{ } . , ( ) ";
        char **word = NULL;
        char *newpoint = NULL;
        char *newerpoint = NULL;
        char *token = NULL;
    //     int occur[50]; // will store number of occurances of each word
    // 
    //     for (; i < 50; i++) //sets all indexes to 0
    //     {
    //         occur[i] = 0;
    //     }
    
    
        for (i = 0; i < len; i++) //counts # of spaces between words
        {
            if ((point[i] == ' ') || (point[i] == ',') || (point[i] == '.'))
            {
                space++;
            }
        }
        word = malloc (sizeof *word * space + 1); //allocates memory to array according to number of words
        newpoint = malloc (strlen (point) + 1);
        strcpy (newpoint, point);
        newerpoint = malloc (strlen (point) + 1);
        strcpy (newerpoint, point);
        token = strtok (newpoint, delim);
    
        printf ("\nSeparating and saving words in pointer array:\n\n");
        for (k = 0; token != NULL; k++)
        {
            word[k] = malloc (strlen (token) + 1);
            strcpy (word[k], token);
            token = strtok (NULL, delim);
            printf ("%s\n", word[k]);
        }
    
        count = k;  /* save number of string in word */
    
        qsort (word, count, sizeof *word, compare_str);     /* sort the array of pointers */
    
        printf ("\nSorted words in pointer array:\n\n");
        for (k = 0; k < count; k++)
            printf ("%s\n", word[k]);
    
        for (k = 0; k < count; k++)
        {
            free(word[k]);
        }
    }
    

    When you compile, there should be NO warnings:

    gcc -Wall -Wextra -o yourprog yourfile.c
    

    Example Use/Output

    $ ./bin/str_find_ltr_words
    enter some text
    the quick brown fox jumped over a lazy dog
    Number of Occurrences of Uppercase letters
    
    
    Number of Occurrences of Lowercase letters
    
    a :     2
    b :     1
    c :     1
    d :     2
    e :     3
    f :     1
    g :     1
    h :     1
    i :     1
    j :     1
    k :     1
    l :     1
    m :     1
    n :     1
    o :     4
    p :     1
    q :     1
    r :     2
    t :     1
    u :     2
    v :     1
    w :     1
    x :     1
    y :     1
    z :     1
    
    
    Separating and saving words in pointer array:
    
    the
    quick
    brown
    fox
    jumped
    over
    a
    lazy
    dog
    
    
    Sorted words in pointer array:
    
    a
    brown
    dog
    fox
    jumped
    lazy
    over
    quick
    the
    

    And as a final point. There is a lot of irrelevant code in your findWords function. I didn't take the time to strip it out because that wasn't preventing your sort of words. Your code with the modifications above compiles without warning. So I'll leave it up to you to go though findWords and get rid of what isn't needed.


    Full Example

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    void findLetters(char *ptr);
    void findWords(char *point);
    void sort_string(char *p);
    int compare_str (const void *a, const void *b);
    
    
    int main (void)
    {
        char textStream[100] = {0};     //up to 98 characters and '\n\ and '\0'
        size_t len = 0;
    
        printf ("enter some text\n");
        if (fgets (textStream, sizeof textStream, stdin)) //input up to 99 characters
        {
            len = strlen (textStream);
            textStream[--len] = 0;      // strip newline from end of textStream
    
            findLetters (textStream);
            findWords (textStream);
            sort_string (textStream);
        }
        else
        {
            printf("fgets failed\n");
        }
    
        return 0;
    }
    
    void findLetters(char *ptr) //find occurences of all letters
    {
        int upLetters[26];
        int loLetters[26];
        int i;
        int index;
    
        for (i = 0; i < 26; i++) // set array to all zero
        {
            upLetters[i] = 0;
            loLetters[i] = 0;
        }
        i = 0;
        while (ptr[i] != '\0') // loop until prt[i] is '\0'
        {
            if (ptr[i] >= 'A' && ptr[i] <= 'Z') //stores occurrences of uppercase letters
            {
                index = ptr[i] - 'A';// subtract 'A' to get index 0-25
                upLetters[index]++;//add one
            }
    
            if (ptr[i] >= 'a' && ptr[i] <= 'z') //stores occurrences of lowercase letters
            {
                index = ptr[i] - 'a';//subtract 'a' to get index 0-25
                loLetters[index]++;//add one
            }
            i++;//next character in ptr
        }
        printf("Number of Occurrences of Uppercase letters\n\n");
        for (i = 0; i < 26; i++)//loop through 0 to 25
        {
            if (upLetters[i] > 0)
            {
                printf("%c : \t%d\n", (char)(i + 'A'), upLetters[i]);
                // add 'A' to go from an index back to a character
            }
        }
        printf("\n");
        printf("Number of Occurrences of Lowercase letters\n\n");
        for (i = 0; i < 26; i++)
        {
            if (loLetters[i] > 0)
            {
                printf("%c : \t%d\n", (char)(i + 'a'), loLetters[i]);
                // add 'a' to go back from an index to a character
            }
        }
        printf("\n");
    }
    
    void findWords(char *point)
    {
        int i, k, count, space;
        int len = strlen (point);
        char *delim = "{ } . , ( ) \n";
        char **word = NULL;
        char *newpoint = NULL;
        char *token = NULL;
    
        i = k = count = space = 0;
    
        for (i = 0; i < len; i++) //counts # of spaces between words
            if ((point[i] == ' ') || (point[i] == ',') || (point[i] == '.'))
                space++;
    
        word = malloc (sizeof *word * space + 1); //allocates memory to array according to number of words
        newpoint = malloc (strlen (point) + 1);
        strcpy (newpoint, point);
        token = strtok (newpoint, delim);
    
        printf ("\nSeparating and saving words in pointer array:\n\n");
        for (k = 0; token != NULL; k++)
        {
            word[k] = malloc (strlen (token) + 1);
            strcpy (word[k], token);
            token = strtok (NULL, delim);
            printf ("%s\n", word[k]);
        }
    
        count = k;  /* save number of string in word */
    
        qsort (word, count, sizeof *word, compare_str);     /* sort the array of pointers */
    
        printf ("\nSorted words in pointer array:\n\n");
        for (k = 0; k < count; k++)
            printf ("%s\n", word[k]);
    
        for (k = 0; k < count; k++)
        {
            free(word[k]);
        }
    }
    
    void sort_string(char *p)
    {
        int c, d = 0, length;
        char *pointer, *result, ch;
    
        length = strlen(p);
        result = (char*)malloc(length + 1);
    
        pointer = p;
    
        for (ch = 'a'; ch <= 'z'; ch++)
        {
            for (c = 0; c < length; c++)
            {
                if (*pointer == ch)
                {
                    *(result + d) = *pointer;
                    d++;
                }
                pointer++;
            }
            pointer = p;
        }
        *(result + d) = '\0';
    
        strcpy(p, result);
    
        free(result);
    }
    
    /* qsort C-string comparison function */
    int compare_str (const void *a, const void *b)
    {
        const char **ia = (const char **)a;
        const char **ib = (const char **)b;
        return strcmp(*ia, *ib);
    }
    

    Why you had an extra newline

    As mentioned in the comments and in the answer, it is always a good idea to strip the trailing '\n' from your strings when you are reading with fgets or getline. It will cause your character count in len to be off by 1 and then for your space count to be off by 1. Now that in itself isn't a crime, it just causes you to allocate one more pointer than needed. However it has consequences due to your definition of:

    char* delim = "{ } . , ( ) ";
    

    Since you do not specify '\n' as a delimiter, strtok happily considers it as a separate word. This does cause problems with your sort as you now have a blank line appearing in your array of words. So you can either strip the newline after you read textStream or add '\n' to delim (or properly both).

    char* delim = "{ } . , ( ) \n";
    

    Case Insensitive Sorting

    When wanting to sort without the effects of your LOCALE sorting capital letters first, etc. Use strcasecmp instead of strcmp. So above, just change the qsort comparison to:

    int compare_str (const void *a, const void *b)
    {
        const char **ia = (const char **)a;
        const char **ib = (const char **)b;
        return strcasecmp(*ia, *ib);
    }
    

    strcasecmp on Windows

    strcasecmp is a non-standard function available on linux and not in windows. To include the same functionality use stricmp instead. See (MSDN stricmp).