Search code examples
rdplyrconditional-statementsmaxna

R conditional max function with NA values


I would like to write an R code to loop through a dataframe, populating a new column "Winner" based on the maximum value from 5 columns in a row. The code runs successfully when all NA values are changed to 0s, but not with the NAs. I tried na.rm = TRUE but to no avail.

  Can1 <- df[row, "Candidate1_percent"]
  Can2  <- df[row, "Candidate2_percent"]
  Can3 <- df[row, "Candidate3_percent"]
  Can4 <- df[row, "Candidate4_percent"]
  Can5  <- df[row, "Candidate5_percent"]

  if(max(Can1,Can2,Can3,Can4,Can5) == Can1) {
    df[row, "Winner"] = df[row,"Candidate1_name"]
  } else if(max(Can1,Can2,Can3,Can4,Can5) == Can2) {
    df[row, "Winner"] = df[row,"Candidate2_name"]
  } else if(max(Can1,Can2,Can3,Can4,Can5) == Can3) {
    df[row, "Winner"] = df[row,"Candidate3_name"]
  } else if(max(Can1,Can2,Can3,Can4,Can5) == Can4) {
    df[row, "Winner"] = df[row,"Candidate4_name"]
  } else if(max(Can1,Can2,Can3,Can4,Can5) == Can5) {
    df[row, "Winner"] = df[row,"Candidate5_name"]
  }
}

Can anyone help?


Solution

  • Hard to reproduce this without more information. However, does the following work for you?

    ##Example data
    test = data.frame(Candidate1_percent = c(10,20,30,NA,50),
                      Candidate1_name = rep("One",5),
                      Candidate2_percent = c(5,50,NA,10,30),
                      Candidate2_name = rep("Two",5),
                      Candidate3_percent = c(40,40,NA,30,25),
                      Candidate3_name  = rep("Three",5),
                      Candidate4_percent = c(90,10,1,10,80),
                      Candidate4_name = rep("Four",5),
                      Candidate5_percent = c(44,13,82,27,12),
                      Candidate5_name = rep("Five",5)
                      )
    
    ##Get the maximum value for each row of interest
    pccols = which(grepl("percent",names(test))) ## the cols with the percent
    maximum = apply(test[,pccols], MARGIN=1, max, na.rm=T) ## get the maximum value
    
    ## find who was the maximum and add name
    test$Winner = NA ## prefill the column of winners
    for(r in seq(nrow(test)) ){ ## loop through
        test$Winner[r] = test[r,which(test[r,pccols]==maximum[r])*2]
    }
    
    test
    

    If you must keep it in the loop you present, then you need to check for the NA to avoid the error:

    df = data.frame(Candidate1_percent = c(10,20,30,NA,50,NA),
                      Candidate1_name = rep("One",6),
                      Candidate2_percent = c(5,50,NA,10,30,NA),
                      Candidate2_name = rep("Two",6),
                      Candidate3_percent = c(40,40,NA,30,25,NA),
                      Candidate3_name  = rep("Three",6),
                      Candidate4_percent = c(90,10,1,10,80,NA),
                      Candidate4_name = rep("Four",6),
                      Candidate5_percent = c(44,13,82,27,12,NA),
                      Candidate5_name = rep("Five",6)
    )
    for(row in seq(nrow(df)) ){
      Can1 <- df[row, "Candidate1_percent"]
      Can2  <- df[row, "Candidate2_percent"]
      Can3 <- df[row, "Candidate3_percent"]
      Can4 <- df[row, "Candidate4_percent"]
      Can5  <- df[row, "Candidate5_percent"]
      
      tempMax = max(Can1,Can2,Can3,Can4,Can5,na.rm=T)
      if(!is.na(Can1) & tempMax == Can1) {
        df[row, "Winner"] = df[row,"Candidate1_name"]
      } else if(!is.na(Can2) & tempMax == Can2) {
        df[row, "Winner"] = df[row,"Candidate2_name"]
      } else if(!is.na(Can3) & tempMax == Can3) {
        df[row, "Winner"] = df[row,"Candidate3_name"]
      } else if(!is.na(Can4) & tempMax == Can4) {
        df[row, "Winner"] = df[row,"Candidate4_name"]
      } else if(!is.na(Can5) & tempMax == Can5) {
        df[row, "Winner"] = df[row,"Candidate5_name"]
      }
    } 
    df