Search code examples
rloopsautomationcasegsub

Grouping Semesters into Academic Years generalization


I have about a specific section of my code. The loop inputs semester files, computes new columns and outputs a data set with the new variables. The loop works beautifully, however making the Acad_Year variable is stagnant, I am looking for a way to make it more flexible so that I won't need to go in and re-write the case_when statement every time there is a new dataset. Sample data is available. Thank you in advance!

{r setup}

require("knitr")
 setwd("~/Downloads/Stack Overflow/")

library(dplyr)
library(tidyr)
library(writexl)

PhGrad <- rbind(PhGrad_08, PhGrad_SP_23) %>% 
  filter(!BannerID== "")

d <- tibble(
  filename = list.files(), 
  Sem = gsub(".*(Fall|Spring|Summer).*", "//1", filename), 
  Year = gsub(".*(//d{2}).*", "//1", filename), 
  grp = gsub(".*(ASPH|ID).*", "//1", filename)) %>% 
  pivot_wider(names_from = "grp", values_from="filename")

res <- vector(mode="list", length=nrow(d))
names(res) <- paste(d$Sem, d$Year, sep="_")

for(i in seq_along(res)){
  ASPH <- rio::import(d$ASPH[i])
  ID <- rio::import(d$ID[i])
  
res[[i]] <- bind_rows(ASPH, ID) %>%
    distinct(ID, Program, .keep_all = T) %>% 
    rowwise() %>% 
    mutate(racecount= sum(c_across(`Race-Am Ind`:`Race- Caucasian`)== "Y", na.rm=T)) %>% 
    ungroup() %>% 
    mutate(racecode= case_when(Citizenship %in% list("NN", "NV") ~ "foreign_national",
                               `Race- Hispanic`== "Y" ~ "hispanic_latino", 
                                racecount >1 ~ "two_or_more_races",
                               `Race-Am Ind`== "Y"  ~ "american_indian_alaskan_native",
                               `Race- Asian`== "Y"  ~ "asian",
                               `Race-Afr Amer`== "Y"  ~ "black_african_american",
                               `Race- Hawaiian` == "Y"  ~ "native_hawaiian_pacific_islander",
                               `Race- Caucasian`== "Y" ~ "white",
                               `Race-Not Rept`== "Y" ~ "race_unknown",
                               TRUE~ "race_unknown"),
           gender_long= case_when(Gender== "F"~ "Female",
                                  Gender== "M"~ "Male",
                                  Gender== "N"~ "Other",
                                  TRUE~ "other"),
           DEPT= case_when(Program %in% list("3GPH363AMS", "3GPH363AMSP", "3GPH378AMCD", "3GPH378AMS", "3GPH379APHD")~ "COMD",
                           Program %in% list("3GPH593AMPH", "3GPH593AMS", "3GPH593APHD", "3GPH569ACGS")~ "ENHS",
                           Program %in% list("3GPH596AMS", "3GPH596AMSPH", "3GPH596APHD","3GPH594AMPH", "3GPH594AMS", "3GPH594AMSPH", "3GPH594APHD", "3GPH586APBAC")~ "EPID/BIOS", 
                           Program %in% list("3GPH331AMS","3GPH331APHD","3GPH334AMS","3GPH335ADPT", "3GPH377AMS", "3GPH388AMS", "3GPH588AMPH", "3GPHJ331MS", "3UPH331ABS")~ "EXSC",
                           Program %in% list("3GPH568APBAC","3GPH592ACGS","3GPH592AMPH", "3GPH592APHD", "3GPH576ACGS", "3GPH121ACGS", "3GID635ACGS")~ "HPEB",
                           Program %in% list("3GPH591AMPH", "3GPH591APHD", "3GPH597AMHA","3GPH591ADPH")~ "HSPM",
                           TRUE~ "Missing"), 
           degree_delivery_type= case_when(`First Concentration`== "R999" | `Second Concentration`== "R999" ~ "Distance-based",
                                           `First Concentration`== "3853" | `Second Concentration`== "3853" ~ "Executive", 
                                           TRUE~ "Campus-based"),
 # FTE_compute= case_when(Level== "GR" & `Course Hours`<9 ~ round(`Course Hours`/9, #digits=2),
 #                                  Level== "GR" & `Course Hours`>=9~ 1,
 #                                 Level== "UG" & `Course Hours`<12~ round(`Course Hours`/12, 
 #digits=2),
 #                                  Level== "UG" & `Course Hours`>=12 ~ 1),
 #          Full_Part_Status=case_when((Level== "GR" & `Course Hours` <9)| (Level== "UG" & 
 #`Course Hours`<12)~"parttime_status",
 #                                      (Level=="GR" & `Course Hours`>=9)|(Level== "UG" & `Course 
 #Hours`>=12)~"fulltime_status",
 #                                       TRUE~ "other"),
           Sem_Year= paste0(d$Sem[i],"_",d$Year[i]),
           StudentCount= 1,
      Acad_Year= case_when(Sem_Year %in% list("Fall_18", "Spring_19", "Summer_19")~ "AY2018-19",
                                 Sem_Year %in% list("Fall_19", "Spring_20", "Summer_20")~ "AY2019-20",
                                 Sem_Year %in% list("Fall_20", "Spring_21", "Summer_21")~ "AY2020-21",
                                 Sem_Year %in% list("Fall_21", "Spring_22", "Summer_22")~ "AY2021-22",
                                 Sem_Year %in% list("Fall_22", "Spring_23")~ "AY2022-23"),
      Deg_group = case_when(Degree %in% list("DPT", "PHD", "DPH")~ "Doctorate",
                            Degree %in% list("MSP", "MCD", "MPH", "MHA", "MS","MSPH")~ "Masters",
                            Degree %in% list("CGS", "PBACC")~ "Certificate")) %>% 
    left_join(., PhGrad %>% mutate_at(vars(BannerID), ~as.character(.)), by= c("ID"="BannerID", "DEPT"), unmatched= "drop", relationship= "many-to-many") %>% 
  mutate(New_Deg= case_when(is.na(Degree.y)== T~ Degree.x,
                     is.na(Degree.y)== F~ Degree.y,
                          TRUE~ "Error")) %>% 
  select(-c(ApplicationID:StudentStatus))
}

Solution

  • library(dplyr)
    data.frame(Sem_Year = c("Fall_21", "Spring_22", "Summer_22",
                            "Fall_31", "Spring_32", "Summer_32")) %>%
      
      tidyr::separate(Sem_Year, c("Sem","Yr"), convert = TRUE, remove = FALSE) %>%
      mutate(AY_end = Yr + if_else(Sem == "Fall", 1, 0),
             Acad_Year = paste0("AY20", AY_end - 1, "-", AY_end)) %>%
      select(-c(Sem, Yr, AY_end))
    

    Result (Reminder: update in 2099)

       Sem_Year Acad_Year
    1   Fall_21 AY2021-22
    2 Spring_22 AY2021-22
    3 Summer_22 AY2021-22
    4   Fall_31 AY2031-32
    5 Spring_32 AY2031-32
    6 Summer_32 AY2031-32