Search code examples
pythonsortingquicksortnp.argsort

How to convert this quicksort Python implementation into an equivalent of Numpy's argsort?


I have a Python iterative quicksort implementation. I'd like to do an argsort instead of a sort though, so that the resulting array has the ranking of the items when sorted instead of the items themselves.

import numpy as np

def argSortPartition(arr, arg, l, h):
    i = (l - 1)
    x = arr[h]

    for j in prange(l, h):
        if arr[j] <= x:
            # increment index of smaller element
            i = i + 1
            arr[i], arr[j] = arr[j], arr[i]
            arg[j] = arg[j] + 1
            arg[i] = arg[i] - 1

    arr[i + 1], arr[h] = arr[h], arr[i + 1]
    arg[i] = arg[i] + 1
    arg[j + 1] = arg[j + 1] + 1
    return (i + 1)


def quickArgSortIterative(arr, start_index, end_index):
    # Create an auxiliary stack
    size = end_index - start_index + 1
    stack = [0] * (size)
    arg = list(range(size))

    # initialize top of stack
    top = -1

    # push initial values of l and h to stack
    top = top + 1
    stack[top] = start_index
    top = top + 1
    stack[top] = end_index

    # Keep popping from stack while is not empty
    while top >= 0:
        # Pop h and l
        end_index = stack[top]
        top = top - 1
        start_index = stack[top]
        top = top - 1

        # Set pivot element at its correct position in
        # sorted array
        p = argSortPartition(arr, arg, start_index, end_index)

        # If there are elements on left side of pivot,
        # then push left side to stack
        if p - 1 > start_index:
            top = top + 1
            stack[top] = start_index
            top = top + 1
            stack[top] = p - 1

        # If there are elements on right side of pivot,
        # then push right side to stack
        if p + 1 < end_index:
            top = top + 1
            stack[top] = p + 1
            top = top + 1
            stack[top] = end_index

    return arg

d = np.array([50, 30, 20, 40, 60])
argsorted = quickArgSortIterative(d, 0, len(d) - 1)
print(argsorted)

>>> Should print [3, 1, 0, 2, 4], but I get [0, 3, 3, 5, 5] on this latest version.

Here's my attempt (the sorting works correctly, but arg (which should hold the output of argsort) does not. I need to sort just once to be efficient, so its an indirect sort operation.

I've tried quite a few things but seem to be going in circles at this point. I know its possible but I can't seem to find any reference implementations even in other languages.

This will eventually run in Numba CUDA compiled code, so I can't just use Numpy's argsort, and I can't use Python's fancy language constructs like list comprehensions. Has to be the caveman way :)

Any help is greatly appreciated!


Solution

  • I fixed it. Here is a working, highly performant arg sort implementation that doesn't use recursion and can be compiled to Numba CUDA:

    import numpy as np
    
    def argSortPartition(arr, arg, l, h):
        i = l - 1
        x = arr[h]
    
        for j in prange(l, h):
            if arr[j] <= x:
                # increment index of smaller element
                i = i + 1
                arr[i], arr[j] = arr[j], arr[i]
                arg[i], arg[j] = arg[j], arg[i]
    
        i += 1
        arr[i], arr[h] = arr[h], arr[i]
        arg[i], arg[h] = arg[h], arg[i]
        return i
    
    
    def quickArgSortIterative(arr, start_index, end_index):
        # Create an auxiliary stack
        size = end_index - start_index + 1
        stack = [0] * size
        arg = list(range(size))
    
        # initialize top of stack
        top = -1
    
        # push initial values of l and h to stack
        top = top + 1
        stack[top] = start_index
        top = top + 1
        stack[top] = end_index
    
        # Keep popping from stack while is not empty
        while top >= 0:
            # Pop h and l
            end_index = stack[top]
            top = top - 1
            start_index = stack[top]
            top = top - 1
    
            # Set pivot element at its correct position in
            # sorted array
            p = argSortPartition(arr, arg, start_index, end_index)
    
            # If there are elements on left side of pivot,
            # then push left side to stack
            if p - 1 > start_index:
                top = top + 1
                stack[top] = start_index
                top = top + 1
                stack[top] = p - 1
    
            # If there are elements on right side of pivot,
            # then push right side to stack
            if p + 1 < end_index:
                top = top + 1
                stack[top] = p + 1
                top = top + 1
                stack[top] = end_index
    
        # must uncomment this loop if returning rankings instead of argsort
        # for i in range(len(arg)):
        #    stack[arg[i]] = i
        
        # to return rankings instead, return stack
        return arg
    
    d = np.array([50, 30, 20, 40, 60])
    argsorted = quickArgSortIterative(d, 0, len(d) - 1)
    print(argsorted)
    

    This was a bit complicated - hope it helps someone.