Goodevening, I'm working on data with two types of blood pressure (non-invasive and arterial) with different duration for each person. It has 6 columns of "id" "begin_time" "end_time" "nibp_time" "nibp_value" "abp_time" "abp_value". I used difftime() to reduce the complexity. Time interval between "nibp_value" or "abp_value" is 5 minutes. So my data looks like below.
df <- data.frame(id = c(1,1,1,1,1,1,1,1,2,2,2,2,3,3),
nibp_time = c(0,1,0,1,0,1,0,1,0,1,2,3,NaN,NaN),
nibp_value = c(80,65,80,65,80,65,80,65,95,90,83,89,NaN,NaN),
abp_time = c(1,1,2,2,3,3,4,4,NaN,NaN,NaN,NaN,0,1),
abp_value = c(68,68,66,66,70,70,73,73,NaN,NaN,NaN,NaN,88,84))
The problem is, there are missing values and I would like to merge "nibp_value" and "abp_value" according to "nibp_time" and "abp_time". If the "nibp_time" equals "abp_time", "abp_value" should be stored ("abp_value" has priority) and looks like below.
df2<- data.frame(id = c(1,1,1,1,1,2,2,2,2,3,3),
bp_time = c(0,1,2,3,4,0,1,2,3,0,1),
bp_value = c(80,68,66,70,73,95,90,83,89,88,84))
So that I can apply
as.data.table(df2)[, dcast(.SD, id ~ bp_time, value.var = "bp_value")]
to make it to serial format.
I tried
df$bp_time <- ifelse(is.na(df$abp_time), df$nibp_time, df$abp_time)
In that way, the value at the time '0' of id '1' would be eliminated because abp_time has no '0' for id '1'. Would you help me to merge properly?
Update 16.12.:
library(data.table)
df <- data.table(id = c(1,1,1,1,1,1,1,1,2,2,2,2,3,3),
nibp_time = c(0,1,0,1,0,1,0,1,0,1,2,3,NaN,NaN),
nibp_value = c(80,65,80,65,80,65,80,65,95,90,83,89,NaN,NaN),
abp_time = c(1,1,2,2,3,3,4,4,NaN,NaN,NaN,NaN,0,1),
abp_value = c(68,68,66,66,70,70,73,73,NaN,NaN,NaN,NaN,88,84))
df[, `:=`(bp_time = ifelse(!is.na(nibp_time) & (nibp_time == 0 | is.na(abp_time)), nibp_time, abp_time),
bp_value = ifelse(!is.na(nibp_time) & (nibp_time == 0 | is.na(abp_time)), nibp_value, abp_value))]
df2 = unique(df[, c(1,6,7)], by=c("id", "bp_time"))
Output:
> df2
id bp_time bp_value
1: 1 0 80
2: 1 1 68
3: 1 2 66
4: 1 3 70
5: 1 4 73
6: 2 0 95
7: 2 1 90
8: 2 2 83
9: 2 3 89
10: 3 0 88
11: 3 1 84
Validation:
df_check<- data.table(id = c(1,1,1,1,1,2,2,2,2,3,3),
bp_time = c(0,1,2,3,4,0,1,2,3,0,1),
bp_value = c(80,68,66,70,73,95,90,83,89,88,84))
> df2 == df_check
id bp_time bp_value
[1,] TRUE TRUE TRUE
[2,] TRUE TRUE TRUE
[3,] TRUE TRUE TRUE
[4,] TRUE TRUE TRUE
[5,] TRUE TRUE TRUE
[6,] TRUE TRUE TRUE
[7,] TRUE TRUE TRUE
[8,] TRUE TRUE TRUE
[9,] TRUE TRUE TRUE
[10,] TRUE TRUE TRUE
[11,] TRUE TRUE TRUE