Search code examples
csortingquicksortpartitioning

3 way quicksort (C implementation)


I try to implement some of the algorithms pure generic using C. I stick with the 3-way quicksort but somehow the implementation does not give correct output. The output nearly sorted but some keys aren't where it should be. The code is below. Thanks in advance.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

static void swap(void *x, void *y, size_t size) {
    void *tmp = malloc(size);

    memcpy(tmp, x, size);
    memcpy(x, y, size);
    memcpy(y, tmp, size);

    free(tmp);
}

static int cmpDouble(const void *i, const void *j) {
    if (*(double *)i < *(double *)j)
        return 1;
    else if (*(double *)i == *(double *)j)
        return 0;
    else 
        return -1;
}

void qsort3way(void *base, int lo, int hi, size_t size,
               int (*cmp)(const void *, const void *)) {
    if (hi <= lo)
        return;
    else {
        char *ptr = (char*)base;
        char *v = ptr + lo * size;

        int lt = lo, gt = hi;
        int i = lo;
        while (i <= gt) {
            int c = cmp(v, ptr + i * size);
            if (c < 0)
                swap(ptr + (lt++) * size, ptr + (i++) * size, size);
            else if (c > 0)
                swap(ptr + i * size, ptr + (gt--) * size, size);    
            else 
                i++;
        }

        qsort3way(base, lo, lt - 1, size, cmp);
        qsort3way(base, gt + 1, hi, size, cmp);
    }     
}

int main(void) {
    int i;
    double *d = (double*)malloc(sizeof(double) * 100);

    for (i = 0; i < 100; i++)
        d[i] = (double)rand();

    qsort3way(d, 0, 100 -1, sizeof(double), cmpDouble);

    for (i = 0; i < 100; i++)
        printf("%.10lf\n", d[i]);

    free(d);
    return 0;
}

sample output:

   41.0000000000
   153.0000000000
   288.0000000000
   2082.0000000000
   292.0000000000
   1869.0000000000
   491.0000000000
   778.0000000000
   1842.0000000000
   6334.0000000000
   2995.0000000000
   8723.0000000000
   3035.0000000000
   3548.0000000000
   4827.0000000000
   3902.0000000000
   4664.0000000000
   5436.0000000000
   4966.0000000000
   5537.0000000000
   5447.0000000000
   7376.0000000000
   5705.0000000000
   6729.0000000000
   6868.0000000000
   7711.0000000000
   9961.0000000000
   8942.0000000000
   9894.0000000000
   9040.0000000000
   9741.0000000000

Solution

  • Your implementation is incorrect because the pivot may move during the partitioning phase and you use a pointer for the comparison which no longer points to it. Implementations in other languages use the value of the pivot instead of its address.

    Note also these shortcomings:

    • recursing both ways may cause stack overflow on pathological distributions. In you case, an array that is already sorted is a pathological distribution.
    • the comparison function should return the opposite values: -1 if a < b, +1 is a > b and 0 if a == b.
    • the API is non-standard and confusing: you should pass the number of elements instead of a range with included bounds.

    Here is a corrected and commented version:

    #include <stdio.h>
    #include <stdlib.h>
    
    static void swap(unsigned char *x, unsigned char *y, size_t size) {
        /* sub-optimal, but better than malloc */
        while (size-- > 0) {
            unsigned char c = *x;
            *x++ = *y;
            *y++ = c;
        }
    }
    
    void qsort3way(void *base, int n, size_t size,
                   int (*cmp)(const void *, const void *))
    {
        unsigned char *ptr = (unsigned char *)base;
    
        while (n > 1) {
            /* use first element as pivot, pointed to by lt */
            int i = 1, lt = 0, gt = n;
            while (i < gt) {
                int c = cmp(ptr + lt * size, ptr + i * size);
                if (c > 0) {
                    /* move smaller element before the pivot range */
                    swap(ptr + lt * size, ptr + i * size, size);
                    lt++;
                    i++;
                } else if (c < 0) {
                    /* move larger element to the end */
                    gt--;
                    swap(ptr + i * size, ptr + gt * size, size);
                    /* test with that element again */
                } else {
                    /* leave identical element alone */
                    i++;
                }
            }
            /* array has 3 parts:
             * from 0 to lt excluded: elements smaller than pivot
             * from lt to gt excluded: elements identical to pivot
             * from gt to n excluded: elements greater than pivot
             */
            /* recurse on smaller part, loop on larger to minimize
               stack use for pathological distributions */
            if (lt < n - gt) {
                qsort3way(ptr, lt, size, cmp);
                ptr += gt * size;
                n -= gt;
            } else {
                qsort3way(ptr + gt * size, n - gt, size, cmp);
                n = lt;
            }
        }
    }    
    
    static int cmp_double(const void *i, const void *j) {
        /* this comparison function does not handle NaNs */
        if (*(const double *)i < *(const double *)j)
            return -1;
        if (*(const double *)i > *(const double *)j)
            return +1;
        else
            return 0;
    }
    
    int main(void) {
        double d[100];
        int i;
    
        for (i = 0; i < 100; i++)
            d[i] = rand() / ((double)RAND_MAX + 1);
    
        qsort3way(d, 100, sizeof(*d), cmp_double);
    
        for (i = 0; i < 100; i++)
            printf("%.10lf\n", d[i]);
    
        return 0;
    }