this is my starting df
test <- data.frame(ident = c(1,2,3,4,5),
cult1 = c("CER", "CER", "CER", "INF", "INF"),
cult2 = c("GEX", "GEX", "GEX", "GEX", "GEX"),
cult3 = c(NA, "ORG", "ORG", NA, NA),
prod = c(11050,2134,0,26250,2124),
value = c(19785,34132,9207,6987,4455),
weight = c(0,0,56,13,556))
I try to spread value, prod and weight for each cult, if its not NA, so far i ve done it but with NA named columns, also i need to duplicate columns value, prod weight for each spread as it get deleted afterwhile.
test <- test %>% mutate (prod2 = prod,
prod3 = prod,
value2 = value,
value3 = value,
weight2 = weight,
weight3 = weight)
test <- test %>% mutate(CATEGORIE_TEMP = paste0("P_", cult1)) %>%
spread(key = CATEGORIE_TEMP, value = "prod")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("V_", cult1)) %>%
spread(key = CATEGORIE_TEMP, value = "value")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("W_", cult1)) %>%
spread(key = CATEGORIE_TEMP, value = "weight")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("P_", cult2)) %>%
spread(key = CATEGORIE_TEMP, value = "prod2")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("V_", cult2)) %>%
spread(key = CATEGORIE_TEMP, value = "value2")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("W_", cult2)) %>%
spread(key = CATEGORIE_TEMP, value = "weight2")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("P_", cult3)) %>%
spread(key = CATEGORIE_TEMP, value = "prod3")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("V_", cult3)) %>%
spread(key = CATEGORIE_TEMP, value = "value3")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("W_", cult3)) %>%
spread(key = CATEGORIE_TEMP, value = "weight3")
#removing the NA named columns
test <- test %>% select(-ends_with("NA"))
It s there a more efficient way to do this ? I do not have pivot_wider function as i am running an old R version.
Expected result :
ident cult1 cult2 cult3 P_CER P_INF V_CER V_INF W_CER W_INF P_GEX V_GEX W_GEX P_ORG V_ORG W_ORG
1 1 CER GEX <NA> 11050 NA 19785 NA 0 NA 11050 19785 0 NA NA NA
2 2 CER GEX ORG 2134 NA 34132 NA 0 NA 2134 34132 0 2134 34132 0
3 3 CER GEX ORG 0 NA 9207 NA 56 NA 0 9207 56 0 9207 56
4 4 INF GEX <NA> NA 26250 NA 6987 NA 13 26250 6987 13 NA NA NA
5 5 INF GEX <NA> NA 2124 NA 4455 NA 556 2124 4455 556 NA NA NA
I'm not sure if you can call it more efficient, but at least it is one code block:
test2 <- test %>%
gather(cult1:cult3, key = "key", value = "cult") %>%
mutate(prod_cult = paste0("P_",cult),
value_cult = paste0("V_",cult),
weight_cult = paste0("W_",cult)) %>%
spread(key = key, value = cult) %>%
spread(key = prod_cult, value = prod) %>%
spread(key = value_cult, value = value) %>%
spread(key = weight_cult, value = weight) %>%
group_by(ident) %>%
summarise_all(~first(na.omit(.))) %>%
select(-ends_with("NA"))