I have a 2d np array and want to collect identical rows by list comprehension. My implementation returns the desired result and a better solution can be found here:
import numpy as np
A = np.array([
[1,1,1,0,0,0], #sample input data
[0,0,1,0,1,1],
[0,0,1,0,1,1],
[1,1,1,0,0,0],
[0,0,1,0,1,1],
[1,0,0,0,0,0],
[1,0,1,0,0,0],
[1,0,0,0,1,0],
[1,0,0,0,0,0]
])
def gr_id_rows(Matrix): #returns list of lists of identical row indices
m = Matrix.shape[0]
M = Matrix
indices = list(range(m))
lst_of_lsts_ident = []
while len(M) > 0:
lst_ident = []
row_0 = M[0,:]
M = np.delete(M, 0, 0)
lst_ident.append(indices.pop(0))
k = 0
for row in M:
if np.array_equal(row, row_0):
M = np.delete(M, k, 0)
lst_ident.append(indices.pop(k))
else:
k += 1
lst_of_lsts_ident.append(lst_ident)
return lst_of_lsts_ident
#execution
print( gr_id_rows(A) ) #[[0, 3], [1, 2, 4], [5, 8], [6], [7]]
Notes about the real dataset:
Can we do this elegantly by using list comprehension?
I made an attempt that (obviously) yields a wrong result.
nbr_rows = A.shape[0]
col_ind = range(A.shape[0])
ind_eq = [[k for k in col_ind if np.array_equal(A[k,:], A[h,:]) and k != h] for h in col_ind]
print(ind_eq) #[[3], [2, 4], [1, 4], [0], [1, 2], [8], [], [], [5]]
Here is a solution using numpy.equal
on A versus itself (broadcasted), and itertools.groupby
to reshape the output:
from itertools import groupby
a,b = np.equal(A, A[:,None]).all(2).nonzero()
{tuple(b[i] for i in g) for i,g in groupby(range(len(a)), lambda i:a[i])}
output:
{(0, 3), (1, 2, 4), (5, 8), (6,), (7,)}