df <- data.frame(PatientID = c("0002" ,"0002", "0005", "0005" ,"0009" ,"0009" ,"0018", "0018" ,"0039" ,"0039" , "0043" ,"0043", "0046", "0046" ,"0048" ,"0048"),
Timepoint= c("A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B"),
A = c(NA , 977.146 , NA , 964.315 ,NA , 952.311 , NA , 950.797 , 947.465 , 902.852 , 985.124 ,NA , 930.141 ,1007.790 , 1027.110 , 999.414),
B = c(998.988 , NA , 998.680 , NA , 1020.560 , 955.540 , 911.606 , 964.039 , 988.087 , 902.367 , 959.338 ,1029.050 , 987.374 ,1066.400 ,957.512 , 917.597),
C = c( 987.140 , 961.810 , 929.466 , 978.166, 969.469 , 943.398 ,936.034, 965.292 , 996.404 , 920.610 , 967.047, 913.517 , 893.428 , 921.606 , 929.590 ,950.493),
D = c( 961.810 , 929.466 , 978.166, 1005.820 , 925.752 , 969.469 ,943.398 , 965.292 , 996.404 , 967.047 , NA , 893.428 , 921.606 , 976.192 , 929.590 , 950.493),
E = c(1006.330, 1028.070 , 954.274 ,1005.910 ,949.969 , 992.820 ,934.407 , 948.913 , 961.375 ,955.296 , 961.128 ,998.119 ,1009.110 , 994.891 ,1000.170 ,982.763),
G= c(NA , 958.990 , 924.680 , 955.927 , NA , 949.384 ,973.348 , 984.392 , 943.894 , 961.468 , 995.368 , 994.997 , 979.454 , 952.605 ,NA , 956.507), stringsAsFactors = F)
Based on this dataframe, I need to create an extra FACTOR level for the variable (df$TimePoint
) that will be filled based on the following conditions - we have already factors A
and B
in that variable so lets say that we want to create factor level X
:
For df$A
. If df$Timepoint
B is >999 then the factor X will be filled with the same value as df$Timepoint level B value, otherwise (if it is ≤999) then it will be filled with the value at df$timepoint A.
For df$B
. If df$Timepoint
B is >986, factor X will be == as df$Timepoint level B value, otherwise X will == df$timepoint A.
For df$C
. If df$Timepoint
B is >1000, factor X will be == as df$Timepoint level B value, otherwise X will == df$timepoint A.
For df$D
. If df$Timepoint
B is >1030, factor X will be == as df$Timepoint level B value, otherwise X will == df$timepoint A.
For df$E
. If df$Timepoint
B is >800, factor X will be == as df$Timepoint level B value, otherwise X will == df$timepoint A.
For df$G
. If df$Timepoint
B is >950, factor X will be == as df$Timepoint level B value, otherwise X will == df$timepoint A.
The new dataframe would look like this:
Thanks in advance! Best
Here is one tidyverse approach.
library(tidyverse)
df %>%
pivot_wider(names_from = Timepoint,
values_from = A:G
) %>%
mutate(A_X = ifelse(A_B > 999, A_B, A_A),
B_X = ifelse(B_B > 986, B_B, B_A),
C_X = ifelse(C_B > 1000, C_B, C_A),
D_X = ifelse(D_B > 1030, D_B, D_A),
E_X = ifelse(E_B > 800, E_B, E_A),
G_X = ifelse(G_B > 950, G_B, G_A)) %>%
pivot_longer(cols = A_A:G_X,
names_sep = "_",
names_to = c("cat","Timepoint")) %>%
pivot_wider(names_from = cat,
values_from = value)
#> # A tibble: 24 x 8
#> PatientID Timepoint A B C D E G
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0002 A NA 999. 987. 962. 1006. NA
#> 2 0002 B 977. NA 962. 929. 1028. 959.
#> 3 0002 X NA NA 987. 962. 1028. 959.
#> 4 0005 A NA 999. 929. 978. 954. 925.
#> 5 0005 B 964. NA 978. 1006. 1006. 956.
#> 6 0005 X NA NA 929. 978. 1006. 956.
#> 7 0009 A NA 1021. 969. 926. 950. NA
#> 8 0009 B 952. 956. 943. 969. 993. 949.
#> 9 0009 X NA 1021. 969. 926. 993. NA
#> 10 0018 A NA 912. 936. 943. 934. 973.
#> 11 0018 B 951. 964. 965. 965. 949. 984.
#> 12 0018 X NA 912. 936. 943. 949. 984.
#> 13 0039 A 947. 988. 996. 996. 961. 944.
#> 14 0039 B 903. 902. 921. 967. 955. 961.
#> 15 0039 X 947. 988. 996. 996. 955. 961.
#> 16 0043 A 985. 959. 967. NA 961. 995.
#> 17 0043 B NA 1029. 914. 893. 998. 995.
#> 18 0043 X NA 1029. 967. NA 998. 995.
#> 19 0046 A 930. 987. 893. 922. 1009. 979.
#> 20 0046 B 1008. 1066. 922. 976. 995. 953.
#> 21 0046 X 1008. 1066. 893. 922. 995. 953.
#> 22 0048 A 1027. 958. 930. 930. 1000. NA
#> 23 0048 B 999. 918. 950. 950. 983. 957.
#> 24 0048 X 999. 958. 930. 930. 983. 957.
Created on 2021-07-29 by the reprex package (v0.3.0)
And here is an alternative using group_modify
and add_row
:
library(tidyverse)
df %>%
group_by(PatientID) %>%
group_modify(.f =
~ {
df_b <- .x[.x$Timepoint == "B",]
df_a <- .x[.x$Timepoint == "A",]
.x %>% add_row(Timepoint = "X",
A = ifelse(df_b$A > 999, df_b$A, df_a$A),
B = ifelse(df_b$B > 986, df_b$B, df_a$B),
C = ifelse(df_b$C > 1000, df_b$C, df_a$C),
D = ifelse(df_b$D > 1030, df_b$D, df_a$D),
E = ifelse(df_b$E > 800, df_b$E, df_a$E),
G = ifelse(df_b$G > 950, df_b$G, df_a$G)
)
})
#> # A tibble: 24 x 8
#> # Groups: PatientID [8]
#> PatientID Timepoint A B C D E G
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0002 A NA 999. 987. 962. 1006. NA
#> 2 0002 B 977. NA 962. 929. 1028. 959.
#> 3 0002 X NA NA 987. 962. 1028. 959.
#> 4 0005 A NA 999. 929. 978. 954. 925.
#> 5 0005 B 964. NA 978. 1006. 1006. 956.
#> 6 0005 X NA NA 929. 978. 1006. 956.
#> 7 0009 A NA 1021. 969. 926. 950. NA
#> 8 0009 B 952. 956. 943. 969. 993. 949.
#> 9 0009 X NA 1021. 969. 926. 993. NA
#> 10 0018 A NA 912. 936. 943. 934. 973.
#> # ... with 14 more rows
Created on 2021-07-29 by the reprex package (v0.3.0)