I am trying to create the following loop:
Loop through columns_to_check (10 columns), if there is no value (i.e. NA) then NA. End loop. If not NA, then check the following conditions:
Create 10 new columns with the result of the loop and label them stp_result_ (value 1 to 10.
Here's my dataframe:
df <- structure (list(
subject_id = c("5467", "6784", "3457", "0987", "1245", "1945","3468", "0012","0823","0812"),
stp_t_1 = c(1,3,5,1,2,5,4,3,3,1),
stp_t1_cor = c(0,0,0,0,0,0,0,0,0,0),
stp_t1_cor_num = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA),
stp_t_2 = c(2,5,1,3,5,1,3,2,2,3),
stp_t2_cor = c(1,0,0,0,0,0,0,0,0,0),
stp_t2_cor_num = c(1,NA,NA,NA,NA,NA,NA,NA,NA,NA),
stp_t_3 = c(3,2,5,4,3,3,3,3,1,5),
stp_t3_cor = c(0,1,0,0,0,0,0,0,0,0),
stp_t3_cor_num = c(NA,4,NA,NA,NA,NA,NA,NA,NA),
stp_t_4 = c(4,1,4,3,NA,NA,1,2,5,NA),
stp_t4_cor = c(1,0,0,0,NA,NA,0,0,0,0),
stp_t4_cor_num = c(1,NA,NA,NA,NA,NA,NA,NA,NA),
stp_t_5 = c(5,NA,3,1,NA,NA,1,3,NA,NA),
stp_t5_cor = c(0,NA,0,0,NA,NA,0,0,NA,NA),
stp_t5_cor_num = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA),
stp_t_6 = c(NA,NA,NA,NA,NA,NA,4,4,NA,NA),
stp_t6_cor = c(NA,NA,NA,NA,NA,NA,0,0,NA,NA),
stp_t6_cor_num = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA),
stp_t_7 = c(NA,NA,NA,NA,NA,NA,5,5,NA,NA),
stp_t7_cor = c(NA,NA,NA,NA,NA,NA,0,0,NA,NA),
stp_t7_cor_num = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA),
stp_t_8 = c(NA,NA,NA,NA,NA,NA,2,1,NA,NA),
stp_t8_cor = c(NA,NA,NA,NA,NA,NA,0,0,NA,NA),
stp_t8_cor_num = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA),
stp_t_9 = c(NA,NA,NA,NA,NA,NA,1,3,NA,NA),
stp_t9_cor = c(NA,NA,NA,NA,NA,NA,0,0,NA,NA),
stp_t9_cor_num = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA),
stp_t_10 = c(NA,NA,NA,NA,NA,NA,1,2,NA,NA),
stp_t10_cor = c(NA,NA,NA,NA,NA,NA,0,0,NA,NA),
stp_t10_cor_num = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA)),
class = "data.frame", row.names = c(NA, -10L))
Here's what I have tried:
columns_to_check <- grep("stp_t_", names(df), value = TRUE)
for (i in 1:length(columns_to_check)) {
col_name <- columns_to_check[i]
new_col_name <- paste0("stp_result_", i)
result <- rep(NA, nrow(df))
for (j in 1:nrow(df)) {
value <- df[[col_name]][j]
if (is.na(value)) {
result[j] <- NA
} else if (value %in% c(2, 4)) {
result[j] <- 0
} else if (value %in% c(1, 3, 5)) {
if (value %in% c(1, 3, 5) && !any(!is.na(result[1:j - 1]) & result[1:j - 1] == 1)) {
result[j] <- 1
} else {
result[j] <- 0
}
} else {
result[j] <- 999
}
}
df[[new_col_name]] <- result
}
I think the entire loop just doesn't work. It results in a lot of 0's and incorrectly states others as 1. Not sure where I am going wrong.
Desired outcome would be the original df with the new columns:
subject_id stp_result_1 stp_result_2 stp_result_3 stp_result_4 stp_result_5 stp_result_6 stp_result_7 stp_result_8 stp_result_9 stp_result_10
5467 1 0 1 0 1 NA NA NA NA NA
6784 1 1 0 1 NA NA NA NA NA NA
3457 1 1 0 0 1 NA NA NA NA NA
0987 1 1 0 0 0 NA NA NA NA NA
1245 1 1 1 NA NA NA NA NA NA NA
1945 1 1 1 NA NA NA NA NA NA NA
3468 0 1 0 1 0 0 1 0 0 0
0012 1 0 0 0 0 0 1 1 0 0
0823 1 0 1 1 NA NA NA NA NA NA
0812 1 1 1 NA NA NA NA NA NA NA
Here is a function that checks and applies the conditions without loops, for
or other. The the code sapply
's the function to each of the columns to check and returns a matrix that can be cbind
'ed with the original data.frame.
fun <- function(x) {
# default value
y <- rep(999, length(x))
# assign NA to y if x is NA
is.na(y) <- is.na(x)
# assign 0 to all y where x is 2 or 4
y[x %in% c(2, 4)] <- 0
# now check values 1, 3, 5
for(i in c(1, 3, 5)) {
if(any(x == i, na.rm = TRUE)) {
w <- which(x == i)
# y gets a 1 for the first such values
y[ w[1L] ] <- 1
# and a 0 if more exist
if(length(w) > 1L) y[ w[-1L] ] <- 0
}
}
y
}
columns_to_check <- grep("stp_t_", names(df), value = TRUE)
res <- apply(df[columns_to_check], 1, fun) |> t()
colnames(res) <- paste0("stp_result_", seq_along(columns_to_check))
res
cbind(df, res)
Created on 2024-05-03 with reprex v2.1.0
See if this works:
apply(df[columns_to_check], 1, fun) |> t()