Search code examples
csortingcounting-sort

Counting Sort displays a weird behavior


I have implemented a Counting Sort in an assignment given to us by a teacher but sometimes it doesn't work for large arrays.

Here is the code:

void countingSort(int *t, int n) {
    int min = findMin(t, n);
    int max = findMax(t, n);
    int range = max - min + 1;
    int *count, *output;
    int i;
    count = (int *)malloc(range * sizeof(int));
    output = (int *)malloc(n * sizeof(int));

    for (i = 0; i < range; i++) {
        count[i] = 0;
    }
    for (i = 0; i < n; i++) {
        count[t[i] - min]++;
    }
    for (i = 1; i < range; i++) {
        count[i] += count[i - 1];
    }
    for (i = n - 1; i >= 0; i--) {
        output[count[t[i] - min] - 1] = t[i];
        count[t[i] - min]--;
    }
    for (i = 0; i < n; i++) {
        t[i] = output[i];
    }
}

What's wrong with my code?


Solution

  • Your code seems to work for small values of range, but might fail if min and max are too far apart, causing the computation of range to overflow the range of int and malloc() to fail.

    You should check for overflow in range and check memory allocation success. Note too that calloc() is more appropriate than malloc() for the count array. Finally, you must free the allocated arrays.

    Here is a modified version:

    #include <limits.h>
    #include <stdio.h>
    #include <stdlib.h>
    
    int findMax(const int *t, int n) {
        int max = INT_MIN;
        while (n-- > 0) {
            if (max < *t) max = *t;
            t++;
        }
        return max;
    }    
    
    int findMin(const int *t, int n) {
        int min = INT_MAX;
        while (n-- > 0) {
            if (min > *t) min = *t;
            t++;
        }
        return min;
    }    
    
    int countingSort(int *t, int n) {
        int min, max, range, i;
        int *count, *output;
    
        if (n <= 0)
            return 0; 
    
        min = findMin(t, n);
        max = findMax(t, n);
    
        if (min < 0 && max >= 0 && (unsigned)max + (unsigned)(-min) >= INT_MAX) {
            fprintf(stderr, "countingSort: value range too large: %d..%d\n", min, max);
            return -1;
        }
        range = max - min + 1;
        if ((count = (int *)calloc(range, sizeof(int))) == NULL) {
            fprintf(stderr, "countingSort: cannot allocate %d element count array\n", range);
            return -1;
        }
        if ((output = (int *)malloc(n * sizeof(int))) == NULL) {
            fprintf(stderr, "countingSort: cannot allocate %d element output array\n", n);
            free(count);
            return -1;
        }
        for (i = 0; i < n; i++) {
            count[t[i] - min]++;
        }
        for (i = 1; i < range; i++) {
            count[i] += count[i - 1];
        }
        for (i = n; i-- > 0;) {
            output[count[t[i] - min] - 1] = t[i];
            count[t[i] - min]--;
        }
        for (i = 0; i < n; i++) {
            t[i] = output[i];
        }
        free(count);
        free(output);
        return 0;
    }
    

    You can avoid the cumbersome and potentially inefficient downward loop by replacing the second and third for loops with this:

        /* compute the first index for each value */
        int index = 0;
        for (i = 0; i < range; i++) {
            incr = count[i];
            count[i] = index;
            index += incr;
        }
        /* copy each value at the corresponding index and update it */
        for (i = 0; i < n; i++) {
            output[count[t[i] - min]++] = t[i];
        }