Search code examples
c++algorithmtime-complexitymergesortdivide-and-conquer

My merge sort algorithm is taking too long for large file input?


well it's not exactly a merge sort, the algorithm counts the number on inversions in an array using merge sort (basically I just added one simple line) it takes 2.415 seconds to read and merge sort 100,000 distinct integers from a text file while others who solved the same problem (on coursera.com) said it took them less than 0.5 seconds

here's my code, what went wrong? file reading maybe? thanks

#include <bits/stdc++.h>

using namespace std;
int a,b,i,j,n,x,k;
int t[100000]={0};
long long int s=0;

void merge(int a, int mid, int b)
     {
         i=a;
        j=mid+1;
        k=a;
        int v[100000]={0};
        while(i<=mid && j<= b)
        {
            if (t[i]<t[j])
                {v[k]=t[i];
                 i++;
                }
            else
            {v[k]=t[j];
             j++;
              s+=mid-i+1; //this line here counts the inversions
             }
           k++;
        }
       while(i<=mid)
        {v[k]=t[i];
         i++; k++;}

        while(j<=b)
        {v[k]=t[j];
         j++; k++;}

      for (i=a;i<k;i++)
         t[i]=v[i];
    }


void mergeSort(int a, int b)
    {
        if(a<b)
        {
           int mid=(a+b)/2;
            mergeSort(a,mid);
            mergeSort(mid+1,b);
            merge(a,mid,b);
        }
    }


int main(){
  ifstream fin("C:\\Users\\ASUS\\Desktop\\coursera.txt");
    n=100000;
    for(i=0;i<n;i++)
        fin>>t[i];

    mergeSort(0,n-1);

     cout<<endl<<s;

}

Solution

  • One issue I could see is that in merge function, you allocate too much space, and the copy back also take quite O(N), which make the total copy time O(N * N) instead of O(N*Log(N)). Simple change to merge function could be like:

    void merge(int a, int mid, int b)
    {
        i = a;
        j = mid + 1;
        k = 0;
        int* v = new int[b - a + 1];
        while (i <= mid && j <= b)
        {
            if (t[i]<t[j])
            {
                v[k] = t[i];
                i++;
            }
            else
            {
                v[k] = t[j];
                j++;
                s += mid - i + 1; //this line here counts the inversions
            }
            k++;
        }
        while (i <= mid)
        {
            v[k] = t[i];
            i++; k++;
        }
    
        while (j <= b)
        {
            v[k] = t[j];
            j++; k++;
        }
    
        for (i = 0; i<k; i++)
            t[a+i] = v[i];
    
        delete[] v;
        v = NULL;
    }