Search code examples
rreadxlwritexl

excel sheets with same prefix to a new excel file in R


I am trying to split excel file with multiple sheets to a new file based on prefix in sheet name.

I would like to output all sheets with prefix a_ to a.xslx , b_ to b.xslx and so on

Here is the code so far:

library(readxl)
library(readr)
library(writexl)

#get sheet names
sheets <- excel_sheets("/path/to/excel/file")
filenames <- paste0(sheets, ".xslx")

#sheet prefix
m <- regexpr("^.*_", filenames) 
file_prefix <- unique(regmatches(filenames, m))
prefnames <- paste0(file_prefix, ".xslx")

#read_excel - sheets with same prefix 
datspref <- lapply(file_prefix, read_excel, path = "/path/to/excel/file")

#save sheets with same prefix to a new excel file
lapply(seq_along(dats), function(i) write_xlsx(dats[[i]], prefnames[i]))

Appreciate any help!


Solution

  • Setup: I made sample.xlsx with the following sheets (and the same 2x2 table on each sheet):

    enter image description here

    xlsxfile <- "sample.xlsx"
    sheetnames <- readxl::excel_sheets(xlsxfile)
    sheetnames
    # [1] "a_1" "a_2" "a_3" "b_1" "b_2" "c_1" "c_3"
    newxlsxfiles <- sub("_.*", "", sheetnames)
    newxlsxfiles
    # [1] "a" "a" "a" "b" "b" "c" "c"
    dats <- lapply(setNames(nm = sheetnames), readxl::read_xlsx, path = xlsxfile)
    str(dats)
    # List of 7
    #  $ a_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
    #   ..$ a: num 1
    #   ..$ b: num 2
    #  $ a_2: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
    #   ..$ a: num 1
    #   ..$ b: num 2
    #  $ a_3: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
    #   ..$ a: num 1
    #   ..$ b: num 2
    #  $ b_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
    #   ..$ a: num 1
    #   ..$ b: num 2
    #  $ b_2: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
    #   ..$ a: num 1
    #   ..$ b: num 2
    #  $ c_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
    #   ..$ a: num 1
    #   ..$ b: num 2
    #  $ c_3: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
    #   ..$ a: num 1
    #   ..$ b: num 2
    datspl <- split(dats, newxlsxfiles)
    str(datspl)
    # List of 3
    #  $ a:List of 3
    #   ..$ a_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
    #   .. ..$ a: num 1
    #   .. ..$ b: num 2
    #   ..$ a_2: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
    #   .. ..$ a: num 1
    #   .. ..$ b: num 2
    #   ..$ a_3: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
    #   .. ..$ a: num 1
    #   .. ..$ b: num 2
    #  $ b:List of 2
    #   ..$ b_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
    #   .. ..$ a: num 1
    #   .. ..$ b: num 2
    #   ..$ b_2: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
    #   .. ..$ a: num 1
    #   .. ..$ b: num 2
    #  $ c:List of 2
    #   ..$ c_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
    #   .. ..$ a: num 1
    #   .. ..$ b: num 2
    #   ..$ c_3: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
    #   .. ..$ a: num 1
    #   .. ..$ b: num 2
    
    invisible( Map(writexl::write_xlsx, datspl, paste0(names(datspl), ".xlsx")) )
    
    list.files(pattern = "xlsx$")
    # [1] "a.xlsx" "b.xlsx" "c.xlsx"
    readxl::excel_sheets("a.xlsx")
    # [1] "a_1" "a_2" "a_3"
    

    FYI: if you need to remove the leading a_ from the sheet names, just change the name of dats before splitting it, perhaps

    # ...
    names(dats) <- sub("^[^_]*_", "", sheetnames)
    datspl <- split(dats, newxlsxfiles)
    # ...