Attached data:
id = c (2, 2, 2, 2, 2, 2, 2, 2, 2, 2)
vm = c("13", "14", "15", "16", "17", "19", "20", "21", "22", "23")
GE = c("0", "0", "0", "0" "0", "0", "1", "0", "1","0")
fichier <- data.frame(id, vm, GE)
Attached R code:
library(tidyverse)
fichier <- fichier %>%
mutate(statut = case_when(
fichier$vm == 13 & fichier$GE == 1 ~ "infection",
fichier$vm == 14 & fichier$GE == 1 ~ "infection",
fichier$vm == 15 & fichier$GE == 1 ~ "infection",
fichier$vm == 16 & fichier$GE == 1 ~ "infection",
fichier$vm == 17 & fichier$GE == 1 ~ "infection",
fichier$vm == 19 & fichier$GE == 1 ~ "infection",
fichier$vm == 20 & fichier$GE == 1 ~ "infection",
fichier$vm == 21 & fichier$GE == 1 ~ "infection",
fichier$vm == 22 & fichier$GE == 1 ~ "infection",
fichier$vm == 23 & fichier$GE == 1 ~ "infection",
TRUE ~ "noinfection")
)
Attached Results:
id = c (2, 2, 2, 2, 2, 2, 2, 2, 2, 2),
vm = c("13", "14", "15", "16", "17", "19", "20", "21", "22", "23"),
GE = c("0", "0", "0", "0" "0", "0", "1", "0", "1","0"),
statut = c("noinfection", "noinfection", "noinfection", "noinfection", "noinfection", "noinfection", "infection", "noinfection", "infecton", "noinfection")
fichier <- data.frame(id, vm, GE, status)
Please, I would like to be able to complete "status" variable at vm = 18 and vm = 24.
a) I would like to give this variable at vm = 18
- the value 0 if GE = 0 during the previous visits of 13 to 17.
- the value 1 if GE = 1 during the previous visits of 13 to 17.
b) I would like to give this variable at vm = 24
- the value 0 if GE = 0 during the previous visits of 19 to 23.
- the value 1 if GE = 1 during the previous visits of 19 to 23.
I see two potential scenarios in your description:
Either way, I have added solutions for both scenarios.
Load required packages and your data:
library(dplyr)
library(tidyr)
# Your sample data
id <- c (2, 2, 2, 2, 2, 2, 2, 2, 2, 2)
vm <- c("13", "14", "15", "16", "17", "19", "20", "21", "22", "23")
GE <- c("0", "0", "0", "0", "0", "0", "1", "0", "1","0")
fichier <- data.frame(id, vm, GE)
Scenario 1:
# Add missing data if all previous visit == 0 or any previous visits == 1
fichier <- fichier |>
mutate(across(c(vm, GE), as.integer)) |>
group_by(id) |> # assuming your data may have multiple id values
complete(vm = 13:24) |>
fill(id, .direction = "down") |>
mutate(tmp = if_else(vm <= 18, 1, 2)) |>
group_by(id, tmp) |>
mutate(GE = case_when(is.na(GE) & sum(GE, na.rm = TRUE) == 0 ~ 0,
is.na(GE) & sum(GE, na.rm = TRUE) > 0 ~ 1,
.default = GE),
status = if_else(GE == 1, "infection", "noinfection")) |>
ungroup() |>
select(-tmp)
fichier
# # A tibble: 12 × 4
# id vm GE status
# <dbl> <int> <dbl> <chr>
# 1 2 13 0 noinfection
# 2 2 14 0 noinfection
# 3 2 15 0 noinfection
# 4 2 16 0 noinfection
# 5 2 17 0 noinfection
# 6 2 18 0 noinfection
# 7 2 19 0 noinfection
# 8 2 20 1 infection
# 9 2 21 0 noinfection
# 10 2 22 1 infection
# 11 2 23 0 noinfection
# 12 2 24 1 infection
Scenario 2:
# Add missing data if all previous visit == 0 or all previous visits == 1
fichier <- fichier |>
mutate(across(c(vm, GE), as.integer)) |>
group_by(id) |> # assuming your data may have multiple id values
complete(vm = 13:24) |>
fill(id, .direction = "down") |>
mutate(tmp = if_else(vm <= 18, 1, 2)) |>
group_by(id, tmp) |>
mutate(tmp1 = +(n_distinct(GE, na.rm = TRUE) == 1),
GE = case_when(is.na(GE) & tmp1 == 1 & first(GE) == 0 ~ 0,
is.na(GE) & tmp1 == 1 & first(GE) == 1 ~ 1,
.default = GE),
status = case_when(GE == 0 ~ "noinfection",
GE == 1 ~ "infection",
.default = "mixed")) |>
ungroup() |>
select(-starts_with("tmp"))
fichier
# # A tibble: 12 × 4
# id vm GE status
# <dbl> <int> <dbl> <chr>
# 1 2 13 0 noinfection
# 2 2 14 0 noinfection
# 3 2 15 0 noinfection
# 4 2 16 0 noinfection
# 5 2 17 0 noinfection
# 6 2 18 0 noinfection
# 7 2 19 0 noinfection
# 8 2 20 1 infection
# 9 2 21 0 noinfection
# 10 2 22 1 infection
# 11 2 23 0 noinfection
# 12 2 24 NA mixed