I implemented this version of quicksort by taking the median of 3 elements as the pivot:
def rearrange_array(array, begin_index, end_index):
# choose 3 random indexes in the array
index1 = randint(begin_index, int(begin_index + (end_index - begin_index)/3))
index2 = randint(int(begin_index + (end_index - begin_index)/3), int(begin_index + 2*(end_index - begin_index)/3))
index3 = randint(int(begin_index + 2*(end_index - begin_index)/3), end_index)
if array[index1] > array[index2]:
tmp = array[index1]
array[index1] = array[index2]
array[index2] = tmp
if array[index2] > array[index3]:
tmp = array[index2]
array[index2] = array[index3]
array[index3] = tmp
if array[index1] > array[index2]:
tmp = array[index1]
array[index1] = array[index2]
array[index2] = tmp
# swap index2 and begin_index
tmp = array[index2]
array[index2] = array[end_index]
array[end_index] = tmp
def partition(array, start_index, end_index): # partition = conquer
rearrange_array(array, start_index, end_index)
pivot = array[end_index] # pivot is always the right most index
i = start_index - 1
# if an element is samller than pivot swap it in front
for j in range(start_index, end_index):
if array[j] <= pivot:
i += 1
# swap
tmp = array[i]
array[i] = array[j]
array[j] = tmp
# swap the pivot with the element after i
# since i only goes forward once something smaller than the pivot is found, we know that
# everything in front of i+1 will be smaller than pivot
tmp = array[i+1]
array[i+1] = array[end_index]
array[end_index] = tmp
return i + 1
def quicksort_recursive(array, start_index, end_index):
if start_index < end_index:
pivot_index = partition(array, start_index, end_index)
quicksort_recursive(array, start_index, pivot_index - 1)
quicksort_recursive(array, pivot_index, end_index)
to test the sorting algorithm, I used it on an array of 100k elements which took ~16 seconds
def get_array(length, maximum):
array = []
for _ in range(length):
array.append(randint(0, maximum))
return array
sys.setrecursionlimit(sys.getrecursionlimit() * 100)
a = get_array(100000, 100)
t = time()
quicksort_recursive(a, 0, len(a) - 1)
print(time() - t)
Then I compared it to this version of quicksort, which only took about 0.8-1 second:
def merge(array, left, right):
# the program will only get here once we call mergesort on
# a one element array
i = j = k = 0
# put the smallest one in the array
while i < len(left) and j < len(right):
if left[i] < right[j]:
array[k] = left[i]
i += 1
else:
array[k] = right[j]
j += 1
k += 1
# if one array still has stuff left:
while i < len(left):
array[k] = left[i]
i += 1
k += 1
while j < len(right):
array[k] = right[j]
j += 1
k += 1
def mergesort_recursive(array):
# 1. devide the array in 2
if len(array) > 1:
# divide
middle_index = int(len(array)/2)
left = array[:middle_index]
right = array[middle_index:]
# conquer
mergesort_recursive(left)
mergesort_recursive(right)
# merge
merge(array, left, right)
I'm a bit confused about this, isn't quicksort supposed to be faster than mergesort? I've tried it on an array of 100 elements and it does put the elements in correct order so I'm not sure what's going on
Thanks!
Examples, didn't used median of 3. On my system, quick sort 0.312 seconds, bottom up merge sort 0.436 seconds, top down merge sort 0.500 seconds. Note for these type of programs, python is over 50 times slower than a compiled language like C or C++.
quick sort hoare partition scheme
import random
from time import time
def qsort(a, lo, hi):
if(lo >= hi):
return
p = a[(lo + hi) // 2] # pivot, any a[] except a[hi]
i = lo - 1
j = hi + 1
while(1):
while(1): # while(a[++i] < p)
i += 1
if(a[i] >= p):
break
while(1): # while(a[--j] < p)
j -= 1
if(a[j] <= p):
break
if(i >= j):
break
a[i],a[j] = a[j],a[i]
qsort(a, lo, j)
qsort(a, j+1, hi)
# test sort
a = [random.randint(0, 2147483647) for r in range(100*1024)]
s = time()
qsort(a, 0, len(a)-1)
e = time()
print e - s
# check to see if data was sorted
f = 0
for i in range (1 ,len(a)):
if(a[i-1] > a[i]):
f = 1
break
if(f == 0):
print("sorted")
else:
print("error")
bottom up merge sort
import random
from time import time
def sort(a):
if(len(a) < 2): # if nothing to do, return
return
b = [0] * len(a) # allocate b
mrgsrt(a, b, len(a))
def mrgsrt(a, b, n):
s = 1 # assume even pass count
if((passcnt(n) & 1) == 1): # if odd count
while(s < n): # swap pairs in place
if(a[s] < a[s-1]):
a[s-1],a[s] = a[s],a[s-1]
s = s + 2
s = 2
while(s < n):
ee = 0 # reset end index
while(ee < n): # setup for next pair of runs
ll = ee
rr = ll + s
if(rr >= n): # if only left run copy it
b[ll:n] = a[ll:n]
break
ee = rr + s
if(ee > n):
ee = n
mrg(a, b, ll, rr, ee)
a,b = b,a # swap(a, b)
s = s << 1 # double run size
def mrg(a, b, ll, rr, ee): # merge a pair of runs from a to b
o = ll # o = b[] index
l = ll # l = a[] left index
r = rr # r = a[] right index
while True:
if(a[l] <= a[r]): # if a[l] <= a[r]
b[o] = a[l] # copy a[l]
o += 1
l += 1
if(l < rr): # if not end of left run
continue # continue (back to while)
b[o:ee] = a[r:ee] # else copy rest of right run
return # and return
else: # else a[l] > a[r]
b[o] = a[r] # copy a[r]
o += 1
r += 1
if(r < ee): # if not end of right run
continue # continue (back to while)
b[o:ee] = a[l:rr] # else copy rest of left run
return # and return
def passcnt(n): # return # passes
i = 0
s = 1
while(s < n):
s = s << 1
i = i + 1
return(i)
# test sort
a = [random.randint(0, 2147483647) for r in range(100*1024)]
c = a[:] # c = sorted copy of a
c.sort()
s = time()
sort(a)
e = time()
print e - s
# check to see if data was sorted properly
f = 0
for i in range (0, len(a)):
if(a[i] != c[i]):
f = 1
break
if(f == 0):
print("sorted")
else:
print("error")
top down merge sort
import random
from time import time
def sort(a):
if(len(a) < 2): # if nothing to do, return
return
b = [0] * len(a) # allocate b
msa2a(a, b, 0, len(a)) # merge sort a to a
def msa2a(a, b, low, end): # merge sort a to a
if((end - low) < 2): # if < 2 elements
return # return
mid = (low+end)//2 # set mid point
msa2b(a, b, low, mid) # merge sort left half to b
msa2b(a, b, mid, end) # merge sort right half to b
mrg(b, a, low, mid, end) # merge halves from b to a
def msa2b(a, b, low, end): # merge sort a to b
if((end - low) < 2): # if < 2 elements
b[low] = a[low] # copy 1 element from a to b
return # return
mid = (low+end)//2 # set mid point
msa2a(a, b, low, mid) # merge sort left half to a
msa2a(a, b, mid, end) # merge sort right half to a
mrg(a, b, low, mid, end) # merge halves from a to b
def mrg(a, b, ll, rr, ee): # merge a pair of runs from a to b
o = ll # o = b[] index
l = ll # l = a[] left index
r = rr # r = a[] right index
while True:
if(a[l] <= a[r]): # if a[l] <= a[r]
b[o] = a[l] # copy a[l]
o += 1
l += 1
if(l < rr): # if not end of left run
continue # continue (back to while)
b[o:ee] = a[r:ee] # else copy rest of right run
return # and return
else: # else a[l] > a[r]
b[o] = a[r] # copy a[r]
o += 1
r += 1
if(r < ee): # if not end of right run
continue # continue (back to while)
b[o:ee] = a[l:rr] # else copy rest of left run
return # and return
# test sort
a = [random.randint(0, 2147483647) for r in range(100*1024)]
c = a[:] # c = sorted copy of a
c.sort()
s = time()
sort(a)
e = time()
print e - s
# check to see if data was sorted properly
f = 0
for i in range (0, len(a)):
if(a[i] != c[i]):
f = 1
break
if(f == 0):
print("sorted")
else:
print("error")