This is my sample data set
sample <- structure(list(Week = c(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2),
Project = c("A", "A", "A", "A", "B", "B", "B", "B", "C", "C", "C", "A", "A", "A" ),
Status= c( "Active","Rescheduled","Active", "Cancelled", "Active", "Cancelled", "Cancelled",
"Rescheduled", "Active", "Active", "Rescheduled", "Cancelled", "Cancelled", "Active")),
.Names = c("Week","Project","Status"),
class = "data.frame" , row.names = c(NA, -14L))
> sample
Week Project Status
1 1 A Active
2 1 A Rescheduled
3 1 A Active
4 1 A Cancelled
5 1 B Active
6 1 B Cancelled
7 1 B Cancelled
8 1 B Rescheduled
9 2 C Active
10 2 C Active
11 2 C Rescheduled
12 2 A Cancelled
13 2 A Cancelled
14 2 A Active
and I want to create a data frame calculates each Program's actve rate and 1-active rate(=Non active rate) by Week, with 'NA' values
So the table would look like this
Week A_active_rate A_Non_active_rate B_active_rate B_Non_active_rate C_active_rate C_Non_active_rate
1 1 0.50 0.50 0.25 0.75 NA NA
2 2 0.33 0.67 NA NA 0.67 0.33
Another tidyverse approach:
library(dplyr)
library(tidyr)
sample %>%
summarize(
active_rate = mean(Status == "Active"),
Non_active_rate = 1 - active_rate,
.by = c(Week, Project)
) %>%
pivot_wider(
names_from = Project,
values_from = active_rate:Non_active_rate,
names_glue = "{Project}_{.value}",
names_vary = "slowest"
)
# # A tibble: 2 × 7
# Week A_active_rate A_Non_active_rate B_active_rate B_Non_active_rate C_active_rate C_Non_active_rate
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 0.5 0.5 0.25 0.75 NA NA
# 2 2 0.333 0.667 NA NA 0.667 0.333