Search code examples
pandasargmax

Find the column index which has the maximum value for each row


I have below data frame:

   Name1 Scr1 Name2 Scr2 Name3 Scr3
   NY    21   CA    45   SF    37
   AZ    31   BK    46   AK    23

I am trying to get the maximum value of each row and corresponding name for each row:

df.idxmax(axis=1)

But how do i get the corresponding name as well?

Expected Output:

   Name Hi_Scr
   CA    45
   BK    46

Solution

  • Use:

    • filter columns with Scr by filter, convert values to numpy array by values
    • get indices of max values with argmax
    • filter columns with Name and select by indexing
    • get max values of numeric
    • create DataFrame by constructor

    a = df.filter(like='Scr').values
    b = a.argmax(axis=1)
    c = df.filter(like='Name').values[np.arange(len(df.index)), b]
    d = a.max(axis=1)
    
    df = pd.DataFrame({'Name':c, 'Hi_Scr':d}, columns=['Name','Hi_Scr'])
    print (df)
      Name  Hi_Scr
    0   CA      45
    1   BK      46
    

    Pandas solution is very similar - create MultiIndex in columns by extract, then select by xs and for looking values use lookup:

    a = df.columns.to_series().str.extract('(\D+)(\d+)', expand=False)
    df.columns = pd.MultiIndex.from_tuples(a.values.tolist())
    
    a = df.xs('Scr', axis=1)
    b = a.idxmax(axis=1)
    c = df.xs('Name', axis=1).lookup(df.index, b)
    d = a.max(axis=1)
    
    df = pd.DataFrame({'Name':c, 'Hi_Scr':d}, columns=['Name','Hi_Scr'])
    print (df)
      Name  Hi_Scr
    0   CA      45
    1   BK      46
    

    Timings:

    df = pd.concat([df]*10000).reset_index(drop=True)
    
    
    def jez2(df):
        a = df.columns.to_series().str.extract('(\D+)(\d+)', expand=False)
        df.columns = pd.MultiIndex.from_tuples(a.values.tolist())
    
        a = df.xs('Scr', axis=1)
        b = a.idxmax(axis=1)
        c = df.xs('Name', axis=1).lookup(df.index, b)
        d = a.max(axis=1)
    
        return pd.DataFrame({'Name':c, 'Hi_Scr':d}, columns=['Name','Hi_Scr'])
    
    
    def jez1(df):
        a = df.filter(like='Scr').values
        b = a.argmax(axis=1)
        c = df.filter(like='Name').values[np.arange(len(df.index)), b]
        d = a.max(axis=1)
    
        return  pd.DataFrame({'Name':c, 'Hi_Scr':d}, columns=['Name','Hi_Scr'])
    
    def dark(df):
        df['id'] = df.index
        ndf = pd.wide_to_long(df, ["Name", "Scr"], i="id", j="number").reset_index(0).set_index('Name')
        return ndf.groupby('id')['Scr'].agg(['max','idxmax']).rename(columns= {'max':'Hi_Scr','idxmax':'Name'})
    

    import time
    
    t0 = time.time()
    print (jez1(df).head())
    t1 = time.time() - t0
    print (t1)
    print (dark(df).head())
    t2 = time.time() - t1
    print (t2)
    print (jez2(df).head())
    t3 = time.time() - t2
    print (t3)
    
      Name  Hi_Scr
    0   CA      45
    1   BK      46
    2   CA      45
    3   BK      46
    4   CA      45
    #jez1 solution
    0.015599966049194336
        Hi_Scr Name
    id             
    0       45   CA
    1       46   BK
    2       45   CA
    3       46   BK
    4       45   CA
    #dark solution
    1515070100.961423
      Name  Hi_Scr
    0   CA      45
    1   BK      46
    2   CA      45
    3   BK      46
    4   CA      45
    #jez2 solution
    0.04679989814758301