data=data.frame("student"=c(1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5),
"time"=c(1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4),
"v1"=c(16,12,14,12,17,16,12,12,13,12,16,16,10,10,14,17,17,12,10,11),
"v2"=c(1,1,3,2,2,2,3,1,2,1,2,1,3,1,1,2,3,3,1,2),
"v3"=c(4,1,4,4,2,2,2,2,1,3,2,3,1,2,2,1,4,1,1,4),
"v4"=c(NA,27,NA,42,40,48,45,25,29,NA,NA,27,NA,NA,NA,NA,NA,NA,44,39),
"v5"=c(NA,1,NA,NA,1,3,3,2,NA,NA,NA,1,NA,NA,NA,NA,3,2,4,1),
"v6"=c(NA,0,1,NA,1,NA,1,NA,0,NA,1,1,NA,NA,NA,NA,0,0,NA,0),
"v7"=c(0,1,1,NA,0,1,1,0,1,0,NA,0,NA,NA,NA,NA,0,1,NA,1),
"v8"=c(1,NA,0,1,0,0,NA,1,1,NA,0,0,NA,NA,NA,NA,1,0,NA,1))
This is my sample data and with it I am seeking to:
A. For time = 1 use v1-v3 to impute v4-v8 using MICE (v4 is continuous, v5 is categorical, v6-v8 is binary)
B. After imputed values are imputed for time = 1, I want to fill NA values that follow with the previous value. So if the variable for time 1-4 is: NA,NA,0,1 and the imputed value at time 1 is 1, then it could be: 1-1-0-1
I attemped:
dataNEW <- mice(data[,data$time == 1],m=5,maxit=50,meth='pmm',seed=500)
A. For time = 1 use v1-v3 to impute v4-v8 using MICE (v4 is continuous, v5 is categorical, v6-v8 is binary)
First, variables v5 - v6 have to be converted to factors:
data$v5 <- factor(data$v5)
data$v6 <- factor(data$v6)
data$v7 <- factor(data$v7)
data$v8 <- factor(data$v8)
Create a predictor matrix to tell mice to use only v1-v3 to predict v4-v8:
Pred_Matrix <- 1 - diag(ncol(data))
Pred_Matrix[,c(1:2, 6:10)] <- 0
Impute using only 1 imputation (the default is 5) because all you want are the imputed values; you're not doing anything else such as pooling the results for modelling.
impA <- mice(subset(data, subset = time==1), pred = Pred_Matrix, m = 1)
The imputed data can be extracted using the complete
function (from the mice package, not tidyr).
B. After imputed values are imputed for time = 1, I want to fill NA values that follow with the previous value. So if the variable for time 1-4 is: NA,NA,0,1 and the imputed value at time 1 is 1, then it could be: 1-1-0-1
library(dplyr)
library(tidyr) # Needed for the fill function
mice::complete(impA) %>%
rbind(subset(data, subset=time!=1)) %>%
arrange(student, time) %>%
group_by(student) %>%
fill(v4:v8)
# A tibble: 20 x 10
# Groups: student [5]
student time v1 v2 v3 v4 v5 v6 v7 v8
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <fct> <fct> <fct>
1 1 1 16 1 4 40 2 1 0 1
2 1 2 12 1 1 27 1 0 1 1
3 1 3 14 3 4 27 1 1 1 0
4 1 4 12 2 4 42 1 1 1 1
5 2 1 17 2 2 40 1 1 0 0
6 2 2 16 2 2 48 3 1 1 0
7 2 3 12 3 2 45 3 1 1 0
8 2 4 12 1 2 25 2 1 0 1
9 3 1 13 2 1 29 1 0 1 1
10 3 2 12 1 3 29 1 0 0 1
11 3 3 16 2 2 29 1 1 0 0
12 3 4 16 1 3 27 1 1 0 0
13 4 1 10 3 1 40 1 0 0 0
14 4 2 10 1 2 40 1 0 0 0
15 4 3 14 1 2 40 1 0 0 0
16 4 4 17 2 1 40 1 0 0 0
17 5 1 17 3 4 40 3 0 0 1
18 5 2 12 3 1 40 2 0 1 0
19 5 3 10 1 1 44 4 0 1 0
20 5 4 11 2 4 39 1 0 1 1
Data
Note, I had to change the first value of v5
to 2, otherwise the polyreg imputation fails (there are only two categories for time=1).
data=data.frame("student"=c(1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5),
"time"=c(1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4),
"v1"=c(16,12,14,12,17,16,12,12,13,12,16,16,10,10,14,17,17,12,10,11),
"v2"=c(1,1,3,2,2,2,3,1,2,1,2,1,3,1,1,2,3,3,1,2),
"v3"=c(4,1,4,4,2,2,2,2,1,3,2,3,1,2,2,1,4,1,1,4),
"v4"=c(NA,27,NA,42,40,48,45,25,29,NA,NA,27,NA,NA,NA,NA,NA,NA,44,39),
"v5"=c(2,1,NA,NA,1,3,3,2,NA,NA,NA,1,NA,NA,NA,NA,3,2,4,1),
"v6"=c(NA,0,1,NA,1,NA,1,NA,0,NA,1,1,NA,NA,NA,NA,0,0,NA,0),
"v7"=c(0,1,1,NA,0,1,1,0,1,0,NA,0,NA,NA,NA,NA,0,1,NA,1),
"v8"=c(1,NA,0,1,0,0,NA,1,1,NA,0,0,NA,NA,NA,NA,1,0,NA,1))