Search code examples
rrbind

In R, rbind dataframes repeting only some elements


I have 4 dataframes (df, df1, df2 and df4) and I want to create df3.

df <- data.frame(
  Date = c("28/04/2010", "26/09/2010"),
  HolidayType = c("MUNICIPAL", "MUNICIPAL"),
  State = c("AC", "MG"),
  Municipality = c("Acrelândia", "Belo Horizonte")) 
df1 <- data.frame(
  Date = c("18/03/2010", "16/01/2010"),
  HolidayType = c("ESTADUAL", "ESTADUAL"),
  State = c("AC","MG"),
  Municipality = c("NA", "NA"))                                                                      
df2 <- data.frame(
  Date = c("10/05/2010", "22/11/2010"),
  HolidayType = c("NACIONAL", "NACIONAL"),
  State = c("NA", "NA"),
  Municipality = c("NA", "NA"))
df4 <- data.frame(
  Date = c("05/07/2010", "07/09/2010"),
  HolidayType = c("OPTIONAL", "OPTIONAL"),
  State = c("NA", "NA"),
  Municipality = c("NA", "NA"))

My desired df3 is like

df3 <- data.frame(Date = c("28/04/2010", "18/03/2010","10/05/2010", "22/11/2010", "05/07/2010", "07/09/2010", "26/09/2010","16/01/2010", "10/05/2010", "22/11/2010", "05/07/2010", "07/09/2010"),
                  HolidayType = c("MUNICIPAL","ESTADUAL", "NACIONAL", "NACIONAL", "OPTIONAL", "OPTIONAL", "MUNICIPAL","ESTADUAL","NACIONAL", "NACIONAL", "OPTIONAL", "OPTIONAL"),
                  State = c("AC", "AC", "MG", "MG"),
                  Municipality = c("Acrelândia", "Acrelândia", "Acrelândia", "Acrelândia", "Acrelândia", "Acrelândia","Belo Horizonte","Belo Horizonte", "Belo Horizonte","Belo Horizonte","Belo Horizonte","Belo Horizonte"))

I am using the following code that is almost doing what is needed but leaving NA in the "Municipality" column, except from the value from df. What to do in this case?

# initialize an empty dataframe to store the merged result
df3 <- data.frame(Date = character(),
                  HolidayType = character(),
                  State = character(),
                  Municipality = character(),
                  stringsAsFactors = FALSE)

# loop through unique Municipality values in df and merge matching rows from df1
for (m in unique(df$Municipality)) {
  df_sub <- df[df$Municipality == m, ]
  df1_sub <- df1[df1$State %in% df_sub$State, ]
  df2_sub <- df2
  df4_sub <- df4
  df3 <- rbind(df3, df_sub, df1_sub, df2_sub, df4_sub)
}

# sort by date and reset row names
df3 <- df3[order(as.Date(df3$Date, format = "%d/%m/%Y")), ]

Solution

  • You didn't make it very clear, but I think what you want is a dataframe of holidays and all places they're observed, based on whether they're municipal, state, national, or "optional" holidays, with optional holidays treated the same as national holidays. If that's right, I would use a series of joins for each holiday type with unique places, then bind together. Using dplyr:

    library(dplyr)  # >= v1.1.0 for `cross_join()`
    
    # create df of unique places
    places <- distinct(df, State, Municipality)
    
    # municipal holidays are already associated with the correct places
    municipal_hdys <- df
    # for state holidays, join by `State`
    state_hdys <- left_join(select(df1, Date, HolidayType, State), places)
    # for national and optional holidays, use a cross-join to associate with all places
    national_hdys <- cross_join(select(df2, Date, HolidayType), places)
    optional_hdys <- cross_join(select(df4, Date, HolidayType), places)
    
    # bind it all together and sort
    bind_rows(municipal_hdys, state_hdys, national_hdys, optional_hdys) %>% 
      arrange(as.Date(Date, format = "%d/%m/%Y"))
    
             Date HolidayType State   Municipality
    1  16/01/2010    ESTADUAL    MG Belo Horizonte
    2  18/03/2010    ESTADUAL    AC     Acrelândia
    3  28/04/2010   MUNICIPAL    AC     Acrelândia
    4  10/05/2010    NACIONAL    AC     Acrelândia
    5  10/05/2010    NACIONAL    MG Belo Horizonte
    6  05/07/2010    OPTIONAL    AC     Acrelândia
    7  05/07/2010    OPTIONAL    MG Belo Horizonte
    8  07/09/2010    OPTIONAL    AC     Acrelândia
    9  07/09/2010    OPTIONAL    MG Belo Horizonte
    10 26/09/2010   MUNICIPAL    MG Belo Horizonte
    11 22/11/2010    NACIONAL    AC     Acrelândia
    12 22/11/2010    NACIONAL    MG Belo Horizonte