Search code examples
csortingmergesortexecution-time

My InsertionSort Program runs faster than MergeSort Program even with very large input. Why?


I am a beginner, I wanted to find out the time taken by my code to execute and find out if Insertion Sort(IS) takes more time than MergeSort(MS) (It is supposed to take more time for sufficiently large input).

So I wrote a Recursive IS algo:

void Insertion_Sort(int *A, int n) {
    if (n < SIZE - 1) {
        int j = n;
        int key = A[j + 1];
        while (j >= 0 && A[j] > key) {
            A[j + 1] = A[j];
            j--;
        }
        A[j + 1] = key;
        n++;
        Insertion_Sort(A, n);
    }
}

int main() { 
    struct timeval tv1, tv2;
    int array[SIZE];

    printf("Enter the elements to be sorted\n");
    freopen("input1.txt", "r", stdin);
    freopen("output1.txt", "w", stdout);
    for (int i = 0; i < SIZE; i++) {
        scanf("%d", array + i);
    }
    gettimeofday(&tv1, NULL);
    for (int i = 0; i < 1000; i++) {
        Insertion_Sort(array, 0);
    }
    gettimeofday(&tv2, NULL);
    printf("The sorted array is :\n");
    for (int i = 0; i < SIZE; i++) {
        printf("%d\n", array[i]);
    }
    printf("The microsecond 1 is %f\n", (double)(tv2.tv_usec));
    printf("The microsecond 2 is %f\n", (double)(tv1.tv_usec));
    printf("Total time = %f seconds\n",
           (double)(tv2.tv_usec - tv1.tv_usec) / 1000000 +
           (double)(tv2.tv_sec - tv1.tv_sec));
}

This was the code. It gave a run time of 0.015s

I also wrote a code for MergeSort:

void Merge(int *Array, int p, int q, int r) {
    int n1 = q - p + 1;
    int n2 = r - q;
    int Arr_1[n1 + 1];
    int Arr_2[n2 + 1];

    for (int i = 0; i < n1; i++) {
        Arr_1[i] = Array[p + i];
    }
    for (int i = 0; i < n2; i++) {
        Arr_2[i] = Array[q + i + 1];
    }
    Arr_1[n1] = __INT_MAX__;
    Arr_2[n2] = __INT_MAX__;
    int i = 0;
    int j = 0;
    for (int k = p ; k <= r; k++) {
        if (Arr_1[i] < Arr_2[j]) {
            Array[k] = Arr_1[i];
            i++;
        } else {
            Array[k] = Arr_2[j];
            j++;
        } 
    }
}

void Merge_Sort(int *Array, int p, int r) {
    if (p < r) { 
        int mid;
        mid = (p + r) / 2;
        Merge_Sort(Array, p, mid);
        Merge_Sort(Array, mid + 1, r);
        Merge(Array, p, mid, r);
    }
}

//driver code
int main() {
    struct timeval tv1, tv2;
    int Array[SIZE];

    printf("Enter the array for Merging Operation:\n");
    freopen("input1.txt", "r", stdin);
    freopen("output3.txt", "w", stdout);
    for (int i = 0; i < SIZE; i++) {
        scanf("%d", &Array[i]);
    }
    gettimeofday(&tv1, NULL);

    for (int i = 0; i < 1000; i++) {
        Merge_Sort(Array, 0, SIZE - 1);
    }
    gettimeofday(&tv2, NULL);
    printf("The sorted array :\n");
    for (int i = 0; i < SIZE; i++) {
        printf("%d\n", Array[i]);
    }
    printf("The microsecond is %f\n", (double)(tv2.tv_usec));
    printf("The microsecond is %f\n", (double)(tv1.tv_usec));
    printf("\n\nTotal time = %f seconds\n",
           (double)(tv2.tv_usec - tv1.tv_usec) / 1000000 +
           (double)(tv2.tv_sec - tv1.tv_sec));
}

This one gave a run time of 0.06s I want to understand why MS is slower than IS even though its supposed to be faster.

I don't really understand how the timing code bit works but it was showing 0 no matter how large of an input I gave it. So, to make it show some result I looped it a 1000 times. I think a possible reason is that the array is sorted already in first invocation in the loop and since IS is linear in best case while MS is n lg(n) in all cases it runs faster. Is that possible? I would be really helpful if someone could show me how to time my code. Thanks in advance


Solution

  • The problem in your test code is you repeatedly sort the same array 1000 times: 999 of these calls involve a sorted array. InsertionSort is very efficient on sorted arrays with a time complexity of O(n), whereas MergeSort has a time complexity of O(n.log(n)) in all cases.

    For a more reliable test, you should make a copy of the unsorted array.

    Here is a modified benchmark:

    #include <errno.h>
    #include <stdio.h>
    #include <string.h>
    #include <sys/time.h>
    
    #define SIZE    1024
    #define REPEAT  1000
    
    void Insertion_Sort(int *A, int n, int last) {
        if (n < last) {
            int j = n;
            int key = A[j + 1];
            while (j >= 0 && A[j] > key) {
                A[j + 1] = A[j];
                j--;
            }
            A[j + 1] = key;
            n++;
            Insertion_Sort(A, n, last);
        }
    }
    
    void Merge(int *Array, int p, int q, int r) {
        int n1 = q - p + 1;
        int n2 = r - q;
        int Arr_1[n1 + 1];
        int Arr_2[n2 + 1];
    
        for (int i = 0; i < n1; i++) {
            Arr_1[i] = Array[p + i];
        }
        for (int i = 0; i < n2; i++) {
            Arr_2[i] = Array[q + i + 1];
        }
        Arr_1[n1] = __INT_MAX__;
        Arr_2[n2] = __INT_MAX__;
        int i = 0;
        int j = 0;
        for (int k = p ; k <= r; k++) {
            if (Arr_1[i] < Arr_2[j]) {
                Array[k] = Arr_1[i];
                i++;
            } else {
                Array[k] = Arr_2[j];
                j++;
            }
        }
    }
    
    void Merge_Sort(int *Array, int p, int r) {
        if (p < r) {
            int mid;
            mid = (p + r) / 2;
            Merge_Sort(Array, p, mid);
            Merge_Sort(Array, mid + 1, r);
            Merge(Array, p, mid, r);
        }
    }
    
    //driver code
    int main() {
        struct timeval tv1, tv2;
        double IS_time, MS_time;
        FILE *fp;
        int SampleArray[SIZE];
        int Array1[SIZE];
        int Array3[SIZE];
    
        fp = fopen("input1.txt", "r");
        if (fp == NULL) {
            fprintf(stderr, "cannot open input1.txt: %s\n", strerror(errno));
            return 1;
        }
        for (int i = 0; i < SIZE; i++) {
            if (fscanf(fp, "%d", &SampleArray[i]) != 1)
                SampleArray[i] = i * i * i % SIZE;
        }
        fclose(fp);
        gettimeofday(&tv1, NULL);
        for (int i = 0; i < REPEAT; i++) {
            memcpy(Array1, SampleArray, sizeof Array1);
            Insertion_Sort(Array1, 0, SIZE - 1);
        }
        gettimeofday(&tv2, NULL);
        IS_time = ((double)(tv2.tv_usec - tv1.tv_usec) / 1000000 +
                   (double)(tv2.tv_sec - tv1.tv_sec));
        for (int i = 1; i < SIZE; i++) {
            if (Array1[i - 1] > Array1[i]) {
                printf("InsertionSort error at %d: %d <-> %d\n",
                       i, Array1[i - 1], Array1[i]);
                break;
            }
        }
    
        gettimeofday(&tv1, NULL);
        for (int i = 0; i < REPEAT; i++) {
            memcpy(Array3, SampleArray, sizeof Array3);
            Merge_Sort(Array3, 0, SIZE - 1);
        }
        gettimeofday(&tv2, NULL);
        MS_time = ((double)(tv2.tv_usec - tv1.tv_usec) / 1000000 +
                   (double)(tv2.tv_sec - tv1.tv_sec));
        for (int i = 1; i < SIZE; i++) {
            if (Array3[i - 1] > Array3[i]) {
                printf("MergeSort error at %d: %d <-> %d\n",
                       i, Array3[i - 1], Array3[i]);
                break;
            }
        }
    
        fp = fopen("output1.txt", "w");
        if (fp == NULL) {
            fprintf(stderr, "cannot open output1.txt: %s\n", strerror(errno));
            return 1;
        }
        for (int i = 0; i < SIZE; i++) {
            fprintf(fp, "%d\n", Array1[i]);
        }
        fclose(fp);
    
        fp = fopen("output3.txt", "w");
        if (fp == NULL) {
            fprintf(stderr, "cannot open output3.txt: %s\n", strerror(errno));
            return 1;
        }
        for (int i = 0; i < SIZE; i++) {
            fprintf(fp, "%d\n", Array3[i]);
        }
        fclose(fp);
    
        printf("InsertionSort total time = %f seconds\n", IS_time);
        printf("MergeSort total time =     %f seconds\n", MS_time);
        return 0;
    }
    

    Output for SIZE=1024:

    InsertionSort total time = 0.110109 seconds
    MergeSort total time =     0.054474 seconds
    

    Output for SIZE=4096:

    InsertionSort total time = 2.057987 seconds
    MergeSort total time =     0.276475 seconds
    

    Output for SIZE=16384:

    InsertionSort total time = 28.658141 seconds
    MergeSort total time =     1.297120 seconds
    

    These timings are consistent with the expected complexity: O(n2) for Insertion Sort and O(n.log(n)) for Merge Sort.