Search code examples
rdplyrna

How to replace NaN and/or Inf with NA in a pipe chain in R


I'm wondering how to replace all Inf and NaN elements in numeric columns in my OUTPUT with NA in a pipe chain?

I tried the following without success: OUTPUT %>% replace(is.infinite(.)|!is.numeric(.), NA)

library(tidyverse)

DATA <- structure(list(InstID = c(2063L, 2063L, 2063L, 2063L, 2113L, 
                                  2113L, 2113L, 2113L, 2440L, 2441L, 2441L, 2441L), 
                       InstNm = c("Adel SD 21", "Adel SD 21", "Adel SD 21", "Adel SD 21", "Adrian SD 61", 
                                  "Adrian SD 61","Adrian SD 61", "Adrian SD 61", "Alabama Department of Education", 
                                  "Alaska Department of Education", "Alaska Department of Education", 
                                  "Alaska Department of Education"),
                       SchlYr = c(2021L, 2122L, 2223L,2324L, 2021L, 2122L, 2223L, 
                                  2324L, 2324L, 2021L, 2122L, 2223L), 
                       Total_Enrollment = c(17L, 13L, 11L, 13L, 262L, 257L, 264L, 
                                          275L, 1L, 3L, 4L, 7L), 
                       Total_EL = c(0L, 0L, 0L, 0L, 25L, 24L, 25L, 24L, 0L, 0L, 0L, 0L), 
                       Total_Arriver = c(0L, 0L, 0L, 0L,0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L)), 
                  row.names = c(NA, 12L), class = "data.frame")


OUTPUT <- DATA %>% group_by(InstNm) %>%
     mutate(Total_Enroll_change = round(((Total_Enrollment - lag(Total_Enrollment))/ lag(Total_Enrollment))*100, 1),
                       Total_EL_change = round(((Total_EL - lag(Total_EL))/ lag(Total_EL))*100,1),
                       Total_Arriver_change = round(((Total_Arriver - lag(Total_Arriver))/ lag(Total_Arriver))*100, 1)) 

Solution

  • Using dplyr::across and an if_else you could do:

    library(dplyr, warn = FALSE)
    
    DATA |>
      mutate(
        Total_Enroll_change = round(((Total_Enrollment - lag(Total_Enrollment)) / lag(Total_Enrollment)) * 100, 1),
        Total_EL_change = round(((Total_EL - lag(Total_EL)) / lag(Total_EL)) * 100, 1),
        Total_Arriver_change = round(((Total_Arriver - lag(Total_Arriver)) / lag(Total_Arriver)) * 100, 1),
        .by = InstNm
      ) |>
      mutate(
        across(
          ends_with("_change"),
          ~ if_else(is.nan(.x) | is.infinite(.x), NA, .x)
        )
      )
    #>    InstID                          InstNm SchlYr Total_Enrollment Total_EL
    #> 1    2063                      Adel SD 21   2021               17        0
    #> 2    2063                      Adel SD 21   2122               13        0
    #> 3    2063                      Adel SD 21   2223               11        0
    #> 4    2063                      Adel SD 21   2324               13        0
    #> 5    2113                    Adrian SD 61   2021              262       25
    #> 6    2113                    Adrian SD 61   2122              257       24
    #> 7    2113                    Adrian SD 61   2223              264       25
    #> 8    2113                    Adrian SD 61   2324              275       24
    #> 9    2440 Alabama Department of Education   2324                1        0
    #> 10   2441  Alaska Department of Education   2021                3        0
    #> 11   2441  Alaska Department of Education   2122                4        0
    #> 12   2441  Alaska Department of Education   2223                7        0
    #>    Total_Arriver Total_Enroll_change Total_EL_change Total_Arriver_change
    #> 1              0                  NA              NA                   NA
    #> 2              0               -23.5              NA                   NA
    #> 3              0               -15.4              NA                   NA
    #> 4              0                18.2              NA                   NA
    #> 5              0                  NA              NA                   NA
    #> 6              0                -1.9            -4.0                   NA
    #> 7              1                 2.7             4.2                   NA
    #> 8              0                 4.2            -4.0                 -100
    #> 9              0                  NA              NA                   NA
    #> 10             0                  NA              NA                   NA
    #> 11             0                33.3              NA                   NA
    #> 12             0                75.0              NA                   NA