Search code examples
algorithmsortingtime-complexitymergesortinsertion-sort

How to explain the result of my experiment aimed to find the threshold between insertion sort and merge sort?


When I reading the book Introduction to Algorithms, I came across the question. You know insertion sort on small arrays is faster than merge sort. But the book asked how we could choose the threshold between the two sorting algorithms in practice. So I design the test code below trying to find it:

#include <iostream>
#include <stdlib.h>
#include <vector>
#include <time.h>

#define N 10000
#define MAXSIZE 10000
#define RUN_TIME_FOR_TEST 1000

using std::vector;

// The parameters, begin and end, are the indices.
void insertion_sort(vector<int> &array_num, int begin, int end)
{
    for (int j = begin + 1; j <= end; j++)
    {
        int key = array_num[j];
        int i = j - 1;
        while (i > begin - 1 && array_num[i] > key)
        {
            array_num[i + 1] = array_num[i];
            i = i - 1;
        }
        array_num[i + 1] = key;
    }
}

void merge(vector<int> &array_num, int begin, int median, int end)
{
    vector<int> array_left, array_right;
    for (int i = begin; i <= median; i++) {
        array_left.push_back(array_num[i]);
    }
    for (int i = median + 1; i <= end; i++) {
        array_right.push_back(array_num[i]);
    }
    
    int i, j;
    i = j = 0;
    for (int k = begin; k <= end; k++) {
        if (i < median - begin + 1 && (j >= end - median || array_left[i] <= array_right[j])) {
            array_num[k] = array_left[i];
            i ++;
        }
        else if (j < end - median && (i >= median - begin + 1 || array_left[i] > array_right[j]))
        {
            array_num[k] = array_right[j];
            j ++;
        }
    }
}

// The parameters, begin and end, are the indices.
void merge_insertion_sort(vector<int> &array_num, int begin, int end, int threshold_merge_insert)
{
    if (begin < end) {
        int median = (begin + end) / 2;
        if (median - begin + 1 <= threshold_merge_insert) {
            insertion_sort(array_num, begin, median);
        }
        else
            merge_insertion_sort(array_num, begin, median, threshold_merge_insert);
        if (end - median <= threshold_merge_insert) {
            insertion_sort(array_num, median + 1, end);
        }
        else
            merge_insertion_sort(array_num, median + 1, end, threshold_merge_insert);
        merge(array_num, begin, median, end);
    }
    return;
}

int main()
{
    vector<int> array_num;
    for (int i = 0; i < N; i++) {
        int rand_num = rand() % MAXSIZE + 1;
        array_num.push_back(rand_num);
    }
    
//    std::cout << "The numbers in the array are: ";
//    for (int i = 0; i < N; i++) {
//        std::cout << array_num[i] << ' ';
//    }
    
//    insertion_sort(array_num, 0, N - 1);
    
    std::cout << "Test begins:";
    clock_t begin, end;
    for (int i = 0; i < N; i += 100) {
        vector<int> array_num_copy(array_num);
        begin = clock();
        for (int j = 0; j < RUN_TIME_FOR_TEST; j++) {
            merge_insertion_sort(array_num_copy, 0, N - 1, i);
        }
        end = clock();
        std::cout << "\nWhen the threshold between merge and insertion sort is " << i << ", the runtime is " << int(end - begin) / RUN_TIME_FOR_TEST << ".";
    }
    
//    std::cout << "\nAfter Merge Sort, they are: ";
//    for (int i = 0; i < N; i++) {
//        std::cout << array_num[i] << ' ';
//    }
    
    getchar();
}

Then I got the output as below:

Test begins:
When the threshold between merge and insertion sort is 0, the runtime is 28639.
When the threshold between merge and insertion sort is 100, the runtime is 6509.
When the threshold between merge and insertion sort is 200, the runtime is 5286.
When the threshold between merge and insertion sort is 300, the runtime is 5322.
When the threshold between merge and insertion sort is 400, the runtime is 4264.
When the threshold between merge and insertion sort is 500, the runtime is 4263.
When the threshold between merge and insertion sort is 600, the runtime is 4267.
When the threshold between merge and insertion sort is 700, the runtime is 3365.
When the threshold between merge and insertion sort is 800, the runtime is 3366.
When the threshold between merge and insertion sort is 900, the runtime is 3367.
When the threshold between merge and insertion sort is 1000, the runtime is 3359.
When the threshold between merge and insertion sort is 1100, the runtime is 3368.
When the threshold between merge and insertion sort is 1200, the runtime is 3377.
When the threshold between merge and insertion sort is 1300, the runtime is 2527.
When the threshold between merge and insertion sort is 1400, the runtime is 2563.
When the threshold between merge and insertion sort is 1500, the runtime is 2528.
When the threshold between merge and insertion sort is 1600, the runtime is 2531.
When the threshold between merge and insertion sort is 1700, the runtime is 2583.
When the threshold between merge and insertion sort is 1800, the runtime is 2618.
When the threshold between merge and insertion sort is 1900, the runtime is 2543.
When the threshold between merge and insertion sort is 2000, the runtime is 2532.
When the threshold between merge and insertion sort is 2100, the runtime is 2539.
When the threshold between merge and insertion sort is 2200, the runtime is 2544.
When the threshold between merge and insertion sort is 2300, the runtime is 2533.
When the threshold between merge and insertion sort is 2400, the runtime is 2532.
When the threshold between merge and insertion sort is 2500, the runtime is 1738.
When the threshold between merge and insertion sort is 2600, the runtime is 1743.
When the threshold between merge and insertion sort is 2700, the runtime is 1842.
When the threshold between merge and insertion sort is 2800, the runtime is 1839.
When the threshold between merge and insertion sort is 2900, the runtime is 1792.
When the threshold between merge and insertion sort is 3000, the runtime is 1741.
When the threshold between merge and insertion sort is 3100, the runtime is 1740.
When the threshold between merge and insertion sort is 3200, the runtime is 1764.
When the threshold between merge and insertion sort is 3300, the runtime is 1762.
When the threshold between merge and insertion sort is 3400, the runtime is 1737.
When the threshold between merge and insertion sort is 3500, the runtime is 1743.
When the threshold between merge and insertion sort is 3600, the runtime is 1742.
When the threshold between merge and insertion sort is 3700, the runtime is 1741.
When the threshold between merge and insertion sort is 3800, the runtime is 1745.
When the threshold between merge and insertion sort is 3900, the runtime is 1755.
When the threshold between merge and insertion sort is 4000, the runtime is 1748.
When the threshold between merge and insertion sort is 4100, the runtime is 1742.
When the threshold between merge and insertion sort is 4200, the runtime is 1741.
When the threshold between merge and insertion sort is 4300, the runtime is 1746.
When the threshold between merge and insertion sort is 4400, the runtime is 1746.
When the threshold between merge and insertion sort is 4500, the runtime is 1742.
When the threshold between merge and insertion sort is 4600, the runtime is 1751.
When the threshold between merge and insertion sort is 4700, the runtime is 1741.
When the threshold between merge and insertion sort is 4800, the runtime is 1738.
When the threshold between merge and insertion sort is 4900, the runtime is 1746.
When the threshold between merge and insertion sort is 5000, the runtime is 1006.
When the threshold between merge and insertion sort is 5100, the runtime is 1019.
When the threshold between merge and insertion sort is 5200, the runtime is 997.
When the threshold between merge and insertion sort is 5300, the runtime is 1002.
When the threshold between merge and insertion sort is 5400, the runtime is 1000.
When the threshold between merge and insertion sort is 5500, the runtime is 1001.
When the threshold between merge and insertion sort is 5600, the runtime is 1000.
When the threshold between merge and insertion sort is 5700, the runtime is 998.
When the threshold between merge and insertion sort is 5800, the runtime is 1002.
When the threshold between merge and insertion sort is 5900, the runtime is 1003.
When the threshold between merge and insertion sort is 6000, the runtime is 1001.
When the threshold between merge and insertion sort is 6100, the runtime is 998.
When the threshold between merge and insertion sort is 6200, the runtime is 997.
When the threshold between merge and insertion sort is 6300, the runtime is 1001.
When the threshold between merge and insertion sort is 6400, the runtime is 1003.
When the threshold between merge and insertion sort is 6500, the runtime is 997.
When the threshold between merge and insertion sort is 6600, the runtime is 998.
When the threshold between merge and insertion sort is 6700, the runtime is 997.
When the threshold between merge and insertion sort is 6800, the runtime is 1003.
When the threshold between merge and insertion sort is 6900, the runtime is 1000.
When the threshold between merge and insertion sort is 7000, the runtime is 998.
When the threshold between merge and insertion sort is 7100, the runtime is 998.
When the threshold between merge and insertion sort is 7200, the runtime is 997.
When the threshold between merge and insertion sort is 7300, the runtime is 1003.
When the threshold between merge and insertion sort is 7400, the runtime is 1000.
When the threshold between merge and insertion sort is 7500, the runtime is 998.
When the threshold between merge and insertion sort is 7600, the runtime is 994.
When the threshold between merge and insertion sort is 7700, the runtime is 996.
When the threshold between merge and insertion sort is 7800, the runtime is 1003.
When the threshold between merge and insertion sort is 7900, the runtime is 1000.
When the threshold between merge and insertion sort is 8000, the runtime is 1002.
When the threshold between merge and insertion sort is 8100, the runtime is 998.
When the threshold between merge and insertion sort is 8200, the runtime is 998.
When the threshold between merge and insertion sort is 8300, the runtime is 1002.
When the threshold between merge and insertion sort is 8400, the runtime is 1000.
When the threshold between merge and insertion sort is 8500, the runtime is 999.
When the threshold between merge and insertion sort is 8600, the runtime is 999.
When the threshold between merge and insertion sort is 8700, the runtime is 998.
When the threshold between merge and insertion sort is 8800, the runtime is 1004.
When the threshold between merge and insertion sort is 8900, the runtime is 999.
When the threshold between merge and insertion sort is 9000, the runtime is 1002.
When the threshold between merge and insertion sort is 9100, the runtime is 1005.
When the threshold between merge and insertion sort is 9200, the runtime is 1001.
When the threshold between merge and insertion sort is 9300, the runtime is 1004.
When the threshold between merge and insertion sort is 9400, the runtime is 1003.
When the threshold between merge and insertion sort is 9500, the runtime is 1002.
When the threshold between merge and insertion sort is 9600, the runtime is 1000.
When the threshold between merge and insertion sort is 9700, the runtime is 997.
When the threshold between merge and insertion sort is 9800, the runtime is 1002.
When the threshold between merge and insertion sort is 9900, the runtime is 999.
Program ended with exit code: 0

You can see from the result that when we choose to use insertion sort on more longer arrays, the runtime seems to become smaller. I guess that the threshold may be larger, but the number of the elements in the array is already 10,000. May someone help me to explain the result?


Solution

  • Your benchmark has a major problem:

    • array_num_copy is initialized outside the inner loop: so it is unsorted for the first iteration, but already sorted for the remainder of this inner loop, hence 99.9% of the time. repeating merge_insertion_sort on a sorted array introduces a strong bias: insertion_sort is optimal on a sorted slice, which explains why you get better timings with large threshold values. You should instead always copy the original array before calling merge_insertion_sort.

    Also note these areas of improvement:

    • the tests in merge_insertion_sort are sub-optimal: you should test if the slice length is below the threshold and invoke insertion_sort on the whole slice.

    • the algorithm in merge can be improved: you should save only the left half of the slice and allocate the vector array_left to its final size directly and use array_left[i - begin] = array_num[i] instead of the more costly array_left.push_back(array_num[i]).

    • you only test a single array size and a single pseudo-random distribution: you would get a different result for different distributions to finding a good threshold requires testing different distributions and making guesses at their likelihood in real cases. The effect of caching may also bias the results.

    • values you test for the threshold are too spread apart: you should test more small values and fewer large ones.

    Here is a modified version using end as the index of the first excluded element and various improvements on the algorithms:

    #include <stdio.h>
    #include <stdlib.h>
    #include <time.h>
    #include <vector>
    
    using std::vector;
    
    // begin and end are the indices: begin is included, end is excluded
    void insertion_sort(vector<int> &array_num, int begin, int end) {
        for (int j = begin + 1; j < end; j++) {
            int key = array_num[j];
            int i = j - 1;
            while (i >= begin && array_num[i] > key) {
                array_num[i + 1] = array_num[i];
                i = i - 1;
            }
            array_num[i + 1] = key;
        }
    }
    
    // begin, median and end are the indices:
    // begin is included, median is the start of the right half, end is excluded
    void merge(vector<int> &array_num, int begin, int median, int end) {
        int n1 = median - begin;
        vector<int> array_left(n1);
    
        for (int i = 0; i < n1; i++) {
            array_left[i] = array_num[begin + i];
        }
    
        for (int i = 0, j = median, k = begin; i < n1;) {
            if (j >= end || array_left[i] <= array_num[j]) {
                array_num[k++] = array_left[i++];
            } else {
                array_num[k++] = array_num[j++];
            }
        }
    }
    
    // begin and end are the indices: begin is included, end is excluded
    void merge_insertion_sort(vector<int> &array_num, int begin, int end, int threshold_merge_insert)
    {
        if (end - begin <= threshold_merge_insert) {
            insertion_sort(array_num, begin, end);
        } else {
            int median = begin + (end - begin) / 2;
            merge_insertion_sort(array_num, begin, median, threshold_merge_insert);
            merge_insertion_sort(array_num, median, end, threshold_merge_insert);
            merge(array_num, begin, median, end);
        }
    }
    
    #define N 10000
    #define MAXVALUE 10000
    #define MIN_THRESHOLD 1
    #define MAX_THRESHOLD 10000
    #define REPEAT_COUNT 100
    
    int main() {
        vector<int> array_num;
        vector<int> array_num_rand(N);
    
        for (int i = 0; i < N; i++) {
            array_num_rand[i] = rand() % MAXVALUE + 1;
        }
    
        //    std::cout << "The numbers in the array are: ";
        //    for (int i = 0; i < N; i++) {
        //        std::cout << array_num[i] << ' ';
        //    }
    
        for (int i = MIN_THRESHOLD; i < MAX_THRESHOLD; i += (i + 9) / 10) {
            int sorted = 1;
            clock_t begin = clock();
            for (int j = 0; j < REPEAT_COUNT; j++) {
                array_num = array_num_rand;
                merge_insertion_sort(array_num, 0, N, i);
            }
            clock_t end = clock();
            for (int j = 1; j < N; j++) {
                if (array_num[j - 1] > array_num[j]) {
                    printf("merge_insertion_sort(array_num, 0, %d, %d) -> sort error at %d\n",
                           N, i, j);
                    sorted = 0;
                    break;
                }
            }
            if (sorted) {
                printf("merge_insertion_sort(array_num, 0, %d, %d) -> %.0fus\n",
                       N, i, (end - begin) * 1000000. / CLOCKS_PER_SEC / REPEAT_COUNT);
            }
        }
    
        printf("Press enter to finish\n");
        getchar();
        return 0;
    }
    

    Sample run:

    merge_insertion_sort(array_num, 0, 10000, 1) -> 1510us
    merge_insertion_sort(array_num, 0, 10000, 2) -> 1217us
    merge_insertion_sort(array_num, 0, 10000, 3) -> 1062us
    merge_insertion_sort(array_num, 0, 10000, 4) -> 1054us
    merge_insertion_sort(array_num, 0, 10000, 5) -> 927us
    merge_insertion_sort(array_num, 0, 10000, 6) -> 899us
    merge_insertion_sort(array_num, 0, 10000, 7) -> 876us
    merge_insertion_sort(array_num, 0, 10000, 8) -> 896us
    merge_insertion_sort(array_num, 0, 10000, 9) -> 828us
    merge_insertion_sort(array_num, 0, 10000, 10) -> 706us
    merge_insertion_sort(array_num, 0, 10000, 11) -> 680us
    merge_insertion_sort(array_num, 0, 10000, 13) -> 687us
    merge_insertion_sort(array_num, 0, 10000, 15) -> 709us
    merge_insertion_sort(array_num, 0, 10000, 17) -> 685us
    merge_insertion_sort(array_num, 0, 10000, 19) -> 680us
    merge_insertion_sort(array_num, 0, 10000, 21) -> 627us
    merge_insertion_sort(array_num, 0, 10000, 24) -> 612us
    merge_insertion_sort(array_num, 0, 10000, 27) -> 653us
    merge_insertion_sort(array_num, 0, 10000, 30) -> 591us
    merge_insertion_sort(array_num, 0, 10000, 33) -> 641us
    merge_insertion_sort(array_num, 0, 10000, 37) -> 622us
    merge_insertion_sort(array_num, 0, 10000, 41) -> 531us
    merge_insertion_sort(array_num, 0, 10000, 46) -> 562us
    merge_insertion_sort(array_num, 0, 10000, 51) -> 538us
    merge_insertion_sort(array_num, 0, 10000, 57) -> 572us
    merge_insertion_sort(array_num, 0, 10000, 63) -> 560us
    merge_insertion_sort(array_num, 0, 10000, 70) -> 542us
    merge_insertion_sort(array_num, 0, 10000, 77) -> 566us
    merge_insertion_sort(array_num, 0, 10000, 85) -> 526us
    merge_insertion_sort(array_num, 0, 10000, 94) -> 532us
    merge_insertion_sort(array_num, 0, 10000, 104) -> 533us
    merge_insertion_sort(array_num, 0, 10000, 115) -> 561us
    merge_insertion_sort(array_num, 0, 10000, 127) -> 536us
    merge_insertion_sort(array_num, 0, 10000, 140) -> 538us
    merge_insertion_sort(array_num, 0, 10000, 154) -> 543us
    merge_insertion_sort(array_num, 0, 10000, 170) -> 576us
    merge_insertion_sort(array_num, 0, 10000, 187) -> 570us
    merge_insertion_sort(array_num, 0, 10000, 206) -> 571us
    merge_insertion_sort(array_num, 0, 10000, 227) -> 630us
    merge_insertion_sort(array_num, 0, 10000, 250) -> 574us
    merge_insertion_sort(array_num, 0, 10000, 275) -> 611us
    merge_insertion_sort(array_num, 0, 10000, 303) -> 596us
    merge_insertion_sort(array_num, 0, 10000, 334) -> 680us
    merge_insertion_sort(array_num, 0, 10000, 368) -> 703us
    merge_insertion_sort(array_num, 0, 10000, 405) -> 708us
    merge_insertion_sort(array_num, 0, 10000, 446) -> 675us
    merge_insertion_sort(array_num, 0, 10000, 491) -> 709us
    merge_insertion_sort(array_num, 0, 10000, 541) -> 690us
    merge_insertion_sort(array_num, 0, 10000, 596) -> 675us
    merge_insertion_sort(array_num, 0, 10000, 656) -> 1017us
    merge_insertion_sort(array_num, 0, 10000, 722) -> 997us
    merge_insertion_sort(array_num, 0, 10000, 795) -> 1000us
    merge_insertion_sort(array_num, 0, 10000, 875) -> 1023us
    merge_insertion_sort(array_num, 0, 10000, 963) -> 1002us
    merge_insertion_sort(array_num, 0, 10000, 1060) -> 975us
    merge_insertion_sort(array_num, 0, 10000, 1166) -> 990us
    merge_insertion_sort(array_num, 0, 10000, 1283) -> 1594us
    merge_insertion_sort(array_num, 0, 10000, 1412) -> 1578us
    merge_insertion_sort(array_num, 0, 10000, 1554) -> 1561us
    merge_insertion_sort(array_num, 0, 10000, 1710) -> 1636us
    merge_insertion_sort(array_num, 0, 10000, 1881) -> 1712us
    merge_insertion_sort(array_num, 0, 10000, 2070) -> 1583us
    merge_insertion_sort(array_num, 0, 10000, 2277) -> 1558us
    merge_insertion_sort(array_num, 0, 10000, 2505) -> 2896us
    merge_insertion_sort(array_num, 0, 10000, 2756) -> 2867us
    merge_insertion_sort(array_num, 0, 10000, 3032) -> 2868us
    merge_insertion_sort(array_num, 0, 10000, 3336) -> 2825us
    merge_insertion_sort(array_num, 0, 10000, 3670) -> 2915us
    merge_insertion_sort(array_num, 0, 10000, 4037) -> 2864us
    merge_insertion_sort(array_num, 0, 10000, 4441) -> 2928us
    merge_insertion_sort(array_num, 0, 10000, 4886) -> 2968us
    merge_insertion_sort(array_num, 0, 10000, 5375) -> 5561us
    merge_insertion_sort(array_num, 0, 10000, 5913) -> 5688us
    merge_insertion_sort(array_num, 0, 10000, 6505) -> 5615us
    merge_insertion_sort(array_num, 0, 10000, 7156) -> 5576us
    merge_insertion_sort(array_num, 0, 10000, 7872) -> 5542us
    merge_insertion_sort(array_num, 0, 10000, 8660) -> 5543us
    merge_insertion_sort(array_num, 0, 10000, 9526) -> 5554us
    Press enter to finish
    

    There is no obvious optimal threshold, but values between 30 and 200 produce better timings. Using a larger threshold will further improve timings for arrays that are partially or fully sorted.