Find unique rows in numpy.array
What is a faster way to get the location of unique rows in numpy
这是一个解决方案:
import numpy as np
A = np.array([[1,0,1],[0,0,0],[1,1,0]])
B = np.array([[0,0,0],[1,0,1],[0,0,0],[1,1,1],[1,1,0]])
# stack the rows, A has to be first
combined = np.concatenate((A, B), axis=0) #or np.vstack
unique, unique_indices, unique_counts = np.unique(combined,
return_index=True,
return_counts=True,
axis=0)
print(unique)
print(unique_indices)
print(unique_counts)
# now we need to derive your desired result from the unique
# indices and counts
# we know the number of rows in A
n_rows_in_A = A.shape[0]
# so we know that the indices from 0 to (n_rows_in_A - 1)
# in unique_indices are rows that appear first or only in A
indices_A = np.nonzero(unique_indices < n_rows_in_A)[0] #first
#indices_A1 = np.argwhere(unique_indices < n_rows_in_A)
print(indices_A)
#print(indices_A1)
unique_indices_A = unique_indices[indices_A]
unique_counts_A = unique_counts[indices_A]
print(unique_indices_A)
print(unique_counts_A)
# now we need to subtract one count from the unique_counts
# that's the one occurence in A that we are not interested in.
unique_counts_A -= 1
print(unique_indices_A)
print(unique_counts_A)
# this is nearly the result we want
# now we need to sort it and account for rows that are not
# appearing in A but in B
# will do that later...