Search code examples
pythonnumpyfunctional-programmingmedianargmax

find index of largest difference from median with numpy


I'm trying to find the index number of the outlier number. based on difference from median I'm able to get the correct high number, but whenever the low number is the outlier I only get the high number..

import numpy as np

def findoutlier(lis):

  outliermax = np.absolute(np.max(lis) - np.median(lis))
  outliermin = np.absolute(np.min(lis) - np.median(lis))
  if outliermax > outliermin:
     argmax = np.argmax(lis, axis = 1)
     return argmax
  else:
     argmin = np.argmin(lis, axis = 1)
     return argmin

def main():
  Matx = np.array([[10,3,2],[1,2,6]])   
  print(findoutlier(Matx))

  threeMatx = np.array([[1,10,2,8,5],[2,7,3,9,11],[19,2,1,1,5]])
  print(findoutlier(threeMatx))

main()

Solution

  • You need to specify the axis when using median, max and min:

    import numpy as np
    
    
    def findoutlier(lis):
        omaxs = np.absolute(np.max(lis, axis=1) - np.median(lis, axis=1))
        omins = np.absolute(np.min(lis, axis=1) - np.median(lis, axis=1))
    
        return [np.argmax(l) if omax > omin else np.argmin(l)  for omax, omin, l in  zip(omaxs, omins, lis)]
    
    
    def main():
        mat_x = np.array([[10, 3, 2], [1, 2, 6]])
        print(findoutlier(mat_x))
    
        three_mat_x = np.array([[1, 10, 2, 8, 5], [2, 7, 3, 9, 11], [19, 2, 1, 1, 5]])
        print(findoutlier(three_mat_x))
    

    Output

    [0, 2]
    [1, 0, 0]
    

    UPDATE

    As mentioned by @user3483203 you can use numpy.where:

    import numpy as np
    
    
    def findoutlier(lis):
        omaxs = np.absolute(np.max(lis, axis=1) - np.median(lis, axis=1))
        omins = np.absolute(np.min(lis, axis=1) - np.median(lis, axis=1))
    
        return np.where(omaxs > omins, np.argmax(lis, axis=1), np.argmin(lis, axis=1))
    
    
    def main():
        mat_x = np.array([[10, 3, 2], [1, 2, 6]])
        print(findoutlier(mat_x))
    
        three_mat_x = np.array([[1, 10, 2, 8, 5], [2, 7, 3, 9, 11], [19, 2, 1, 1, 5]])
        print(findoutlier(three_mat_x))
    
    main()
    

    Output

    [0 2]
    [1 0 0]