I'm trying to find the index number of the outlier number. based on difference from median I'm able to get the correct high number, but whenever the low number is the outlier I only get the high number..
import numpy as np
def findoutlier(lis):
outliermax = np.absolute(np.max(lis) - np.median(lis))
outliermin = np.absolute(np.min(lis) - np.median(lis))
if outliermax > outliermin:
argmax = np.argmax(lis, axis = 1)
return argmax
else:
argmin = np.argmin(lis, axis = 1)
return argmin
def main():
Matx = np.array([[10,3,2],[1,2,6]])
print(findoutlier(Matx))
threeMatx = np.array([[1,10,2,8,5],[2,7,3,9,11],[19,2,1,1,5]])
print(findoutlier(threeMatx))
main()
You need to specify the axis when using median, max and min:
import numpy as np
def findoutlier(lis):
omaxs = np.absolute(np.max(lis, axis=1) - np.median(lis, axis=1))
omins = np.absolute(np.min(lis, axis=1) - np.median(lis, axis=1))
return [np.argmax(l) if omax > omin else np.argmin(l) for omax, omin, l in zip(omaxs, omins, lis)]
def main():
mat_x = np.array([[10, 3, 2], [1, 2, 6]])
print(findoutlier(mat_x))
three_mat_x = np.array([[1, 10, 2, 8, 5], [2, 7, 3, 9, 11], [19, 2, 1, 1, 5]])
print(findoutlier(three_mat_x))
Output
[0, 2]
[1, 0, 0]
UPDATE
As mentioned by @user3483203 you can use numpy.where:
import numpy as np
def findoutlier(lis):
omaxs = np.absolute(np.max(lis, axis=1) - np.median(lis, axis=1))
omins = np.absolute(np.min(lis, axis=1) - np.median(lis, axis=1))
return np.where(omaxs > omins, np.argmax(lis, axis=1), np.argmin(lis, axis=1))
def main():
mat_x = np.array([[10, 3, 2], [1, 2, 6]])
print(findoutlier(mat_x))
three_mat_x = np.array([[1, 10, 2, 8, 5], [2, 7, 3, 9, 11], [19, 2, 1, 1, 5]])
print(findoutlier(three_mat_x))
main()
Output
[0 2]
[1 0 0]