Search code examples
rexcelloopsnestedsapply

Problems with nested sapply for batch extracting excel sheets and combining in one dataset


I have a specific set of sheets (see name_sheet) from a set of excel files (file.names) that I need to extract in order to generate a database by sheet.

I am having problems with the sapply function as I need to repeat the code in section C for each sheet. I am getting the following error message:

Error in FUN(X[[i]], ...) : 
  Cannot find the sheet you requested in the file!

Here is the code I am using:

# A: Create data set of sheet names and indexes
name_sheet <- rbind.data.frame(c("EXTRAC.N°01", 1), 
                              c("PREC.FRES 12", 13), 
                              c("PREC.SALPR 13", 14),
                              c("PREC.SECO 14", 15), 
                              c("CUADRO Nº 17 Y HABLADO 02", 18),
                              c("DESEMB.N°05 HABLADO-01-03-05", 20),
                              c("CUADRO Nº 15 (2)", 21))

colnames(name_sheet) <- c("name","index")

# B: Read all excel files into a list
file.names <- list.files(here("data"), pattern = "ANUAL", full.names = TRUE, recursive = TRUE)

# C: Read each of the selected sheets by file and append them by sheet

for (i in nrow(name_sheet)) {
df.list[i] <- lapply(file.names, 
                  read.xlsx, 
                  sheetIndex = name_sheet$index[i],
                  header = TRUE)

# Combine in one dataset
  df_[i] <- smartbind(list = df.list[i], fill = " ")

# Write to disk
  write.xlsx(df_[i], here("Data", "Consumo_Humano.xlsx"), 
           sheetName = name_sheet$name[i], 
           row.names = FALSE)
}

Solution

  • First there were several errors in the code:

    1. @X Æ A-12 was right I was not using openxlsx, and therefore now I have used its functions (loadWorkbook, addWorksheet, etc.).
    2. The loop did not have the correct sequence: nrow(name_sheet)

    Secondly, as consequence of using openxlsx, you need to create a workbook first and then start to create sheets. Hence the final code is:

        #Generate list of sheets and positions
        name_sheet <- rbind.data.frame(c("EXTRAC.N°01", 1), 
                                      c("PREC.FRES 12", 13), 
                                      c("PREC.SALPR 13", 14),
                                      c("PREC.SECO 14", 15), 
                                      c("CUADRO Nº 17", 18),
                                      c("DESEMB.N°05 (2)", 20),
                                      c("CUADRO Nº 15 (2)", 21))
        
        colnames(name_sheet) <- c("name","index")
        
        # Read all excel files into a list
        file.names <- list.files(here("data"), 
                                 pattern = "ANUAL", 
                                 full.names = TRUE, 
                                 recursive = TRUE)
        
        #Read each of the selected sheets by file and joint them
        
        for (i in 1:nrow(name_sheet)) {
        df.list <- lapply(file.names, 
                          read.xlsx, 
                          sheet = name_sheet$name[i],
                          colNames = TRUE,
                          skipEmptyRows = TRUE,
                          check.names = TRUE,
                          fillMergedCells = FALSE,
                          )
        
        # Combine in one dataset
        
        df <- smartbind(list = df.list, fill = "")
        
        # Load workbook and write to disk
        
        wb <- loadWorkbook(file = here("Data", "Consumo_Humano.xlsx"))
        
        sheet <- name_sheet$name[i]
        
        # Add sheets
        
        addWorksheet(wb,
          sheetName = sheet,
          header = c("ODD HEAD LEFT", "ODD HEAD CENTER", "ODD HEAD RIGHT"),
          footer = c("ODD FOOT RIGHT", "ODD FOOT CENTER", "ODD FOOT RIGHT")
        )
        
        writeData(wb, sheet = sheet, x = df, colNames = T, rowNames = F)
        
        saveWorkbook(wb,
          file = here("Data", "Consumo_Humano.xlsx"),
          overwrite = TRUE
        )
        }