How to convert this quicksort Python implementation into an equivalent of Numpy's argsort?

I have a Python iterative quicksort implementation. I'd like to do an argsort instead of a sort though, so that the resulting array has the ranking of the items when sorted instead of the items themselves.

import numpy as np

def argSortPartition(arr, arg, l, h):
    i = (l - 1)
    x = arr[h]

    for j in prange(l, h):
        if arr[j] <= x:
            # increment index of smaller element
            i = i + 1
            arr[i], arr[j] = arr[j], arr[i]
            arg[j] = arg[j] + 1
            arg[i] = arg[i] - 1

    arr[i + 1], arr[h] = arr[h], arr[i + 1]
    arg[i] = arg[i] + 1
    arg[j + 1] = arg[j + 1] + 1
    return (i + 1)


def quickArgSortIterative(arr, start_index, end_index):
    # Create an auxiliary stack
    size = end_index - start_index + 1
    stack = [0] * (size)
    arg = list(range(size))

    # initialize top of stack
    top = -1

    # push initial values of l and h to stack
    top = top + 1
    stack[top] = start_index
    top = top + 1
    stack[top] = end_index

    # Keep popping from stack while is not empty
    while top >= 0:
        # Pop h and l
        end_index = stack[top]
        top = top - 1
        start_index = stack[top]
        top = top - 1

        # Set pivot element at its correct position in
        # sorted array
        p = argSortPartition(arr, arg, start_index, end_index)

        # If there are elements on left side of pivot,
        # then push left side to stack
        if p - 1 > start_index:
            top = top + 1
            stack[top] = start_index
            top = top + 1
            stack[top] = p - 1

        # If there are elements on right side of pivot,
        # then push right side to stack
        if p + 1 < end_index:
            top = top + 1
            stack[top] = p + 1
            top = top + 1
            stack[top] = end_index

    return arg

d = np.array([50, 30, 20, 40, 60])
argsorted = quickArgSortIterative(d, 0, len(d) - 1)
print(argsorted)

>>> Should print [3, 1, 0, 2, 4], but I get [0, 3, 3, 5, 5] on this latest version.

Here's my attempt (the sorting works correctly, but arg (which should hold the output of argsort) does not. I need to sort just once to be efficient, so its an indirect sort operation.

I've tried quite a few things but seem to be going in circles at this point. I know its possible but I can't seem to find any reference implementations even in other languages.

This will eventually run in Numba CUDA compiled code, so I can't just use Numpy's argsort, and I can't use Python's fancy language constructs like list comprehensions. Has to be the caveman way :)

Any help is greatly appreciated!

Solution

I fixed it. Here is a working, highly performant arg sort implementation that doesn't use recursion and can be compiled to Numba CUDA:

import numpy as np

def argSortPartition(arr, arg, l, h):
    i = l - 1
    x = arr[h]

    for j in prange(l, h):
        if arr[j] <= x:
            # increment index of smaller element
            i = i + 1
            arr[i], arr[j] = arr[j], arr[i]
            arg[i], arg[j] = arg[j], arg[i]

    i += 1
    arr[i], arr[h] = arr[h], arr[i]
    arg[i], arg[h] = arg[h], arg[i]
    return i


def quickArgSortIterative(arr, start_index, end_index):
    # Create an auxiliary stack
    size = end_index - start_index + 1
    stack = [0] * size
    arg = list(range(size))

    # initialize top of stack
    top = -1

    # push initial values of l and h to stack
    top = top + 1
    stack[top] = start_index
    top = top + 1
    stack[top] = end_index

    # Keep popping from stack while is not empty
    while top >= 0:
        # Pop h and l
        end_index = stack[top]
        top = top - 1
        start_index = stack[top]
        top = top - 1

        # Set pivot element at its correct position in
        # sorted array
        p = argSortPartition(arr, arg, start_index, end_index)

        # If there are elements on left side of pivot,
        # then push left side to stack
        if p - 1 > start_index:
            top = top + 1
            stack[top] = start_index
            top = top + 1
            stack[top] = p - 1

        # If there are elements on right side of pivot,
        # then push right side to stack
        if p + 1 < end_index:
            top = top + 1
            stack[top] = p + 1
            top = top + 1
            stack[top] = end_index

    # must uncomment this loop if returning rankings instead of argsort
    # for i in range(len(arg)):
    #    stack[arg[i]] = i
    
    # to return rankings instead, return stack
    return arg

d = np.array([50, 30, 20, 40, 60])
argsorted = quickArgSortIterative(d, 0, len(d) - 1)
print(argsorted)

This was a bit complicated - hope it helps someone.