Search code examples
rdataframedplyrnatidy

Changing Values in Dataframe based on Specific Columns


I currently have a consisting of lower and upper bounds of confidence intervals and point estimates. I want to create a new dataframe so I can plot the variables where point estimates are not equal to zero. For each value of n, is it possible to change the values of the upper and lower bounds to if the point estimate value is 0? For example, in the dataframe provided when n = 205, y_pe = 0 so I would like to change the corresponding y_lo and y_up to NA.

X1_lo <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
X1_up <-c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
X1_pe <-c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
x_lo <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
x_up <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
x_pe <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
y_lo <- c(-24.71177, -25.13779, -16.19142, -15.63819 ,-15.42051 ,-16.11342, -17.10603 ,-18.00848 ,-19.59877, -12.91438)
y_up <- c(14.074116, 14.051209, 13.417954, 12.187319 ,13.602022, 12.943939,  1.317839 ,11.891103, 15.165398,  1.365459)
y_pe <- c(-2.984101, -2.867680, -2.695838 ,-2.583140, -2.416878 , 0.000000,  0.000000 , 0.000000 , 0.000000,  0.000000)


test.df <- cbind(n, X1_lo, X1_up, x_lo, x_up, y_lo, y_up, X1_pe, x_pe, y_pe) 

Thanks in advance.


Solution

  • Specify a logical vector in i, and j with the column names, then assign the columns where the 'y_pe' is 0 to NA

    test.df[test.df[, "y_pe"] == 0, c("y_lo", "y_up")] <- NA
    

    If the data is data.frame and wants to be applied to all sets of columns except the first column ('n'), then we can split into a group of data.frames and do the replacement

    nm1 <- sub("_.*", "", colnames(test.df)[-1])
    out <- do.call(cbind, unname(lapply(split.default(test.df[-1], nm1),
           function(x) {
             i1 <- endsWith(names(x), "pe")
             x[!x[,i1], !i1] <- NA
            x})))
    out
    #   x_lo x_up x_pe X1_lo X1_up X1_pe      y_lo     y_up      y_pe
    #1    NA   NA    0    NA    NA     0 -24.71177 14.07412 -2.984101
    #2    NA   NA    0    NA    NA     0 -25.13779 14.05121 -2.867680
    #3    NA   NA    0    NA    NA     0 -16.19142 13.41795 -2.695838
    #4    NA   NA    0    NA    NA     0 -15.63819 12.18732 -2.583140
    #5    NA   NA    0    NA    NA     0 -15.42051 13.60202 -2.416878
    #6    NA   NA    0    NA    NA     0        NA       NA  0.000000
    #7    NA   NA    0    NA    NA     0        NA       NA  0.000000
    #8    NA   NA    0    NA    NA     0        NA       NA  0.000000
    #9    NA   NA    0    NA    NA     0        NA       NA  0.000000
    #10   NA   NA    0    NA    NA     0        NA       NA  0.000000
    
    test.df[names(out)] <-  out
    test.df
    #     n X1_lo X1_up x_lo x_up      y_lo     y_up X1_pe x_pe      y_pe
    #1  205    NA    NA   NA   NA -24.71177 14.07412     0    0 -2.984101
    #2  205    NA    NA   NA   NA -25.13779 14.05121     0    0 -2.867680
    #3  205    NA    NA   NA   NA -16.19142 13.41795     0    0 -2.695838
    #4  205    NA    NA   NA   NA -15.63819 12.18732     0    0 -2.583140
    #5  205    NA    NA   NA   NA -15.42051 13.60202     0    0 -2.416878
    #6  205    NA    NA   NA   NA        NA       NA     0    0  0.000000
    #7  205    NA    NA   NA   NA        NA       NA     0    0  0.000000
    #8  205    NA    NA   NA   NA        NA       NA     0    0  0.000000
    #9  205    NA    NA   NA   NA        NA       NA     0    0  0.000000
    #10 205    NA    NA   NA   NA        NA       NA     0    0  0.000000
    

    Or using tidyverse

    library(dplyr)
    library(tidyr)
    library(stringr)
    test.df %>%
       mutate(rn = row_number()) %>% 
       pivot_longer(cols = -c(n, rn), names_sep="_", 
            names_to = c("group", ".value")) %>%
       mutate_at(vars(lo, up), ~ replace(., pe == 0, NA)) %>%
       pivot_wider(names_from = "group", values_from = c('lo', 'up', 'pe'),
              names_repair = ~ str_replace(., "(.*)_(.*)", "\\2_\\1")) %>%
       select(-rn)
    # A tibble: 10 x 10
    #       n X1_lo  x_lo  y_lo X1_pe  x_pe  y_pe X1_up  x_up  y_up
    #   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
    # 1   205    NA    NA -24.7     0     0 -2.98    NA    NA  14.1
    # 2   205    NA    NA -25.1     0     0 -2.87    NA    NA  14.1
    # 3   205    NA    NA -16.2     0     0 -2.70    NA    NA  13.4
    # 4   205    NA    NA -15.6     0     0 -2.58    NA    NA  12.2
    # 5   205    NA    NA -15.4     0     0 -2.42    NA    NA  13.6
    # 6   205    NA    NA  NA       0     0  0       NA    NA  NA  
    # 7   205    NA    NA  NA       0     0  0       NA    NA  NA  
    # 8   205    NA    NA  NA       0     0  0       NA    NA  NA  
    # 9   205    NA    NA  NA       0     0  0       NA    NA  NA  
    #10   205    NA    NA  NA       0     0  0       NA    NA  NA  
    

    data

    test.df <- data.frame(n, X1_lo, X1_up, x_lo, x_up, y_lo, y_up, X1_pe, x_pe, y_pe)