Search code examples
pythonnumpycasting

What is the fastest way to map numpy array of unsigned integer {0,1} to float {1,-1}


I have a numpy array of np.uint64 holding only 0 or 1 values, and I have to map 0 to np.float64(1.0), and 1 to np.float64(-1.0).

Since the interpreter doesn't knows that it has only to convert 0 and 1, it uses a costly general algorithm, so I thought to use an array with the result, and use the uint64 as index for the array, avoiding any conversion, but it is even slower.

import numpy as np
import timeit


random_bit = np.random.randint(0, 2, size=(10000), dtype=np.uint64)


def np_cast(random_bit):
    vectorized_result = 1.0 - 2.0 * np.float64(random_bit)
    return vectorized_result


def product(random_bit):
    mapped_result = 1.0 - 2.0 * random_bit
    return mapped_result


np_one_minus_one = np.array([1.0, -1.0]).astype(np.float64)


def _array(random_bit):
    mapped_result = np_one_minus_one[random_bit]
    return mapped_result


one = np.float64(1)
minus_two = np.float64(-2)


def astype(random_bit):
    mapped_result = one + minus_two * random_bit.astype(np.float64)
    return mapped_result


function_list = [np_cast, product, _array, astype]

print("start benchmark")
for function in function_list:
    _time = timeit.timeit(lambda: function(random_bit), number=100000)
    print(f"{function.__name__}: {_time:.3f} seconds")

I get these times:

np_cast: 178.604 seconds
product: 172.939 seconds
_array: 239.305 seconds
astype: 186.031 seconds

Solution

  • You can do this ~4x faster by using numba, for the general Nd case this could be:

    import numba as nb
    
    @nb.vectorize
    def numba_if(random_bit):
        return -1.0 if random_bit else 1.0
    
    @nb.vectorize
    def numba_product(random_bit):
        return 1.0 - 2.0 * random_bit
    

    Or in your specific 1d case you can use explicit loops to make it faster:

    import numpy as np
    
    @nb.njit
    def numba_if_loop(random_bit):
        assert random_bit.ndim == 1
        result = np.empty_like(random_bit, dtype=np.float64)
        for i in range(random_bit.size):
            result[i] = -1.0 if random_bit[i] else 1.0
        return result
    
    @nb.njit
    def numba_product_loop(random_bit):
        assert random_bit.ndim == 1
        result = np.empty_like(random_bit, dtype=np.float64)
        for i in range(random_bit.size):
            result[i] = 1.0 - 2.0 * random_bit[i]
        return result
    

    Timings (mason is lambda x:(1-2*x.astype(np.int8)).astype(float) from the comments):

    %timeit np_cast(random_bit)
    %timeit product(random_bit)
    %timeit _array(random_bit)
    %timeit astype(random_bit)
    %timeit mason(random_bit)
    
    assert np.array_equal(np_cast(random_bit), numba_if(random_bit))
    assert np.array_equal(np_cast(random_bit), numba_product(random_bit))
    assert np.array_equal(np_cast(random_bit), numba_if_loop(random_bit))
    assert np.array_equal(np_cast(random_bit), numba_product_loop(random_bit))
    
    %timeit numba_if(random_bit)
    %timeit numba_product(random_bit)
    %timeit numba_if_loop(random_bit)
    %timeit numba_product_loop(random_bit)
    

    Output:

    6.58 µs ± 218 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
    7.58 µs ± 251 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
    11 µs ± 9.34 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
    7.32 µs ± 674 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
    6.86 µs ± 153 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
    1.89 µs ± 25.8 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
    2.07 µs ± 13.1 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
    1.6 µs ± 14.7 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
    1.78 µs ± 5.31 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)