Suppose you have the following dataframe:
df <- data.frame(year=c(rep(2010,12),rep(2011,12),rep(2012,12)),
country=c(rep("DEU",4),rep("ITA",4),rep("USA",4),
rep("DEU",4),rep("ITA",4),rep("USA",4),
rep("DEU",4),rep("ITA",4),rep("USA",4)),
industry=c(rep(1:4,9)),
stock1=c(rep(0,24),0,0,2,4,1,0,1,2,3,3,3,5),
stock2=c(rep(0,24),0,3,3,4,5,0,1,1,2,2,2,5))
and you want to get the following outcome:
df2 <- data.frame(year=c(rep(2010,12),rep(2011,12),rep(2012,12)),
country=c(rep("DEU",4),rep("ITA",4),rep("USA",4),
rep("DEU",4),rep("ITA",4),rep("USA",4),
rep("DEU",4),rep("ITA",4),rep("USA",4)),
industry=c(rep(1:4,9)),
stock1=c(rep(NA,24),0,0,2,4,1,0,1,2,3,3,3,5),
stock2=c(rep(NA,24),0,3,3,4,5,0,1,1,2,2,2,5))
The concept is that if, for a particular year, a specific country reports zeros in stock2 across ALL industries, then those zeros should be replaced with NAs (Not Available) in both stock1 and stock2. My attempt below
library(dplyr)
df2 = df %>%
group_by(country, year, industry) %>%
mutate(
stock1 = ifelse(all(stock2 == 0), NA, stock1),
stock2 = ifelse(all(stock2 == 0), NA, stock2)
)
Thanks!
You can try this approach:
df %>%
mutate(
ind = all(stock2==0),
across(stock1:stock2, ~if_else(ind, NA,.)),
.by = c(year, country)) %>% select(-ind)
To avoid the temporary ind
column (used to ensure if_else()
TRUE and FALSE have same length, you can use case_when()
as a shortcut:
mutate(
df,
across(stock1:stock2, ~case_when(all(stock2==0)==F~.)),
.by = c(year, country)
)
Borrowing @shaun_m excellent little trick, you can also use if_else()
by setting FALSE to 1, and multiplying:
mutate(
df,
mutate(across(stock1:stock2, ~if_else(all(stock2==0), NA,1)*.),
.by = c(year, country)
)
All give same output