data=data.frame("person"=c(1,1,1,2,2,2,2,3,3,3,3,3),
"time"=c(1,2,3,1,2,3,4,1,2,3,4,5),
"score"=c(NA,100,97,63,NA,NA,51,NA,NA,73,NA,NA),
"count"=c(2,2,2,2,2,2,2,1,1,1,1,1))
I aim to generate 'count' column which is simply equals to the number of non missing 'score' values in my data frame.
This is my attempt but it fails
data$count_try <- aggregate(data[c("count_try")], by=list(person), FUN=sum, na.rm=TRUE)
After I am able to create this 'count' variable I am hopeful to then REMOVE persons with 3 or more NA values, thanks.
df1=data.frame("person"=c(1,1,1,2,2,2,2,3,3,3,3,3),
"time"=c(1,2,3,1,2,3,4,1,2,3,4,5),
"score"=c(NA,100,97,63,NA,NA,51,NA,NA,73,NA,NA))
library(dplyr)
df1 %>% group_by(person) %>% mutate(count = sum(!is.na(score))) %>%
filter(count > n()-3)
#> # A tibble: 7 x 4
#> # Groups: person [2]
#> person time score count
#> <dbl> <dbl> <dbl> <int>
#> 1 1 1 NA 2
#> 2 1 2 100 2
#> 3 1 3 97 2
#> 4 2 1 63 2
#> 5 2 2 NA 2
#> 6 2 3 NA 2
#> 7 2 4 51 2
Created on 2019-06-17 by the reprex package (v0.3.0)