arrays numpy python-itertools numpy-indexed

Efficiently find full row permutations in 2d array having repeated row pairs

Consider the array:

import numpy as np
import numpy_indexed as npi
from itertools import permutations

arr = np.array([[1, 2, 3, 4],
                [3, 3, 3, 6],
                [2, 0, 0, 2],
                [2, 0, 0, 2],  
                [8, 2, 8, 2],
                [4, 5, 4, 5], 
                [3, 3, 3, 6],
                [4, 5, 4, 5],
                [0, 9, 8, 7],
                [1, 2, 3, 4]])

I need to find all unique row permutations. What I currently do is find all 10! row permutations, then use npi.unique. Like this:

arr_perms = np.array([arr[i, :] for i in permutations(range(len(arr)))])
u, index = npi.unique(arr_perms, return_index = True, axis=0)

This works as expected, but it seems expensive because of the nature of the arrays I am working with. These arrays all have at least one pair (usually several pairs) of identical rows.

In the small example shown, the 10 rows hold 4 identical row pairs, so the total number of unique row permutqtions is only 10!/2^4 = 226800, a big reduction.

QUESTION: Is there a way to efficiently find the unique row permutations without first having to find the full set of permutations?

Solution

This here might help you. It still creates all permutations, but it is 10 times faster than the code you posted, since each subarray of length 4 is converted to an int using bit shifting.

import math
import numpy as np
import numpy_indexed as npi


def generate_permutations_matrix(n, k):
    """
    Generate a matrix of permutations for given values of n and k.

    Parameters:
    - n (int): Total number of elements.
    - k (int): Length of each permutation.

    Returns:
    - np.ndarray: Matrix of permutations.
    """
    result_matrix = np.zeros((math.perm(n, k), k), np.uint8)
    f = 1
    for m in range(n - k + 1, n + 1):
        sub_matrix = result_matrix[:f, n - m + 1:]
        for i in range(1, m):
            result_matrix[i * f: (i + 1) * f, n - m] = i
            result_matrix[i * f: (i + 1) * f, n - m + 1:] = sub_matrix + (
                    sub_matrix >= i
            )
        sub_matrix += 1
        f *= m
    return result_matrix


def convert_to_integer(arr):
    """
    Convert a 2D array of integers to a single integer value.

    Parameters:
    - arr (np.ndarray): Input 2D array.

    Returns:
    - int: Integer representation of the array.
    """
    arr = arr.astype(np.uint64)
    integer_value = arr[..., 0].copy()
    for ival in range(1, arr.shape[1]):
        integer_value += arr[..., ival] << ival * 8
    return integer_value


def get_permutations(arr, len_of_each):
    """
    Get unique permutations of rows in a 2D array.

    Parameters:
    - arr (np.ndarray): Input 2D array.
    - len_of_each (int): Length of each permutation.

    Returns:
    - np.ndarray: Permutations matrix.
    """
    integer_value = convert_to_integer(arr)

    # Create dictionaries for mapping integer values to unique identifiers
    lookup_dict = {}
    lookup_dict2 = {}
    alpha1 = []
    identifier = 0

    # Iterate through integer values and create mappings
    for ini, x in enumerate(integer_value):
        if x in lookup_dict:
            value = lookup_dict.get(x)
            alpha1.append((value, x))
            lookup_dict2[ini] = value
        else:
            alpha1.append((identifier, x))
            lookup_dict[x] = identifier
            lookup_dict2[ini] = identifier
            identifier += 1

    # Generate permutations matrix
    permutations_matrix = generate_permutations_matrix(len(alpha1), len_of_each)

    # Map original identifiers back to permutations matrix
    cond_list = []
    choice_list = []
    for nom in lookup_dict2.items():
        cond_list.append(permutations_matrix == nom[0])
        choice_list.append(nom[1])

    choice_list = np.array(choice_list, dtype=np.uint8)
    unique_permutations = npi.unique(
        np.select(cond_list, choice_list, 0), return_index=False, axis=0
    )

    # Map back to original identifiers
    cond_list2 = []
    choice_list2 = []
    for nom in lookup_dict.items():
        cond_list2.append(unique_permutations == nom[1])
        choice_list2.append(nom[0])

    result = np.select(cond_list2, choice_list2, 0)

    # Convert back to the original 2D array format
    return np.stack(
        [
            np.dstack([(result[..., g] >> rx * 8) & 255 for rx in range(arr.shape[1])])
            for g in range(len_of_each)
        ],
        axis=2,
    ).squeeze()


# Example usage
arr = np.array(
    [
        [1, 2, 3, 4],
        [3, 3, 3, 6],
        [2, 0, 0, 2],
        [2, 0, 0, 2],
        [8, 2, 8, 2],
        [4, 5, 4, 5],
        [3, 3, 3, 6],
        [4, 5, 4, 5],
        [0, 9, 8, 7],
        [1, 2, 3, 4],
    ]
)
result_permutations = get_permutations(arr=arr, len_of_each=10)

from itertools import permutations


def test(arr):
    arr_perms = np.array([arr[i, :] for i in permutations(range(len(arr)))])
u, index = npi.unique(arr_perms, return_index=True, axis=0)
return u, index

%timeit result_permutations = get_permutations(arr=arr, len_of_each=10)
1.52 s ± 27.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit test(arr)
13.6 s ± 52.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)