Search code examples
rmergedataset

How to combine two workbooks containing multiple worksheets having the same worksheets name in each workbooks together in R?


I have two workbooks where both have the same number of worksheets and share the same worksheet name. In each worksheet of either workbooks, they have the same column name but the strings in each row of workbook 1 is different to that of workbook 2. How to combine the different strings together in each worksheets into the worksheets of the new workbooks with R?

Below is the code that I have tried but unsuccessful:

output_folder_for_xlsx1 <- "replace with folder path for xlsx1"  
output_folder_for_xlsx2 <- "replace with folder path for xlsx2"
combined_output_folder  <- "replace with folder path for combined xlsx"

output_file_from_xlsx1 <- file.path(output_folder_for_xlsx1, "xlsx1.xlsx")
output_file_from_xlsx2 <- file.path(output_folder_for_xlsx2, "xlsx2.xlsx")

wb_combined <- createWorkbook()

wb1 <- loadWorkbook(output_file_from_xlsx1)
for (sheet_name in getSheetNames(wb1)) {
  sheet_data <- read.xlsx(output_file_from_xlsx1, sheet = sheet_name)
  addWorksheet(wb_combined, sheetName = sheet_name)
  writeData(wb_combined, sheet = sheet_name, sheet_data)
}

wb2 <- loadWorkbook(output_file_from_xlsx2)
for (sheet_name in getSheetNames(wb2)) {
  sheet_data <- read.xlsx(output_file_from_xlsx2, sheet = sheet_name)
  addWorksheet(wb_combined, sheetName = sheet_name)
  writeData(wb_combined, sheet = sheet_name, sheet_data)
}

output_combined_file <- file.path(combined_output_folder, "combined_workbook.xlsx")
saveWorkbook(wb_combined, file = output_combined_file, overwrite = TRUE)

it generates error Error in file.exists(file): invalid 'file' argument when I ran

for (sheet_name in getSheetNames(wb1)) {
  sheet_data <- read.xlsx(output_file_from_xlsx1, sheet = sheet_name)
  addWorksheet(wb_combined, sheetName = sheet_name)
  writeData(wb_combined, sheet = sheet_name, sheet_data)
}

or

for (sheet_name in getSheetNames(wb2)) {
  sheet_data <- read.xlsx(output_file_from_xlsx, sheet = sheet_name)
  addWorksheet(wb_combined, sheetName = sheet_name)
  writeData(wb_combined, sheet = sheet_name, sheet_data)
}

Can someone help please? Thanks.

Link provided are example of Workbook1, Workbook2 and the expected combined workbook.

Adding snapshot below: Workbook1 enter image description here

Workbook2 enter image description here

combined workbook enter image description here

This is another code I have tried (generated by chatgpt) and same error when looping:

library(openxlsx)
library(readr)

# Paths to the input workbooks
first_workbook_path <- "Workbook1.xlsx"
second_workbook_path <- "Workbook2.xlsx"

# Load worksheets from the first workbook
worksheets_first <- loadWorkbook(first_workbook_path)

# Load worksheets from the second workbook
worksheets_second <- loadWorkbook(second_workbook_path)

# Create a new workbook to store the combined data
combined_workbook <- createWorkbook()

# Loop through each worksheet in both workbooks
for (sheet_name in getSheetNames(worksheets_first)) {
  # Read data from the first workbook's worksheet
  data_first <- read.xlsx(worksheets_first, sheet = sheet_name)

  # Read data from the second workbook's worksheet
  data_second <- read.xlsx(worksheets_second, sheet = sheet_name)

  # Combine the data from both workbooks under the same worksheet name
  combined_data <- rbind(data_first, data_second)

  # Add the combined data to the new workbook
  addWorksheet(combined_workbook, sheetName = sheet_name)
  writeData(combined_workbook, sheet = sheet_name, combined_data)
}

# Path to the output combined workbook
output_combined_path <- "combined_workbook.xlsx"

# Save the combined workbook
saveWorkbook(combined_workbook, output_combined_path, overwrite = TRUE)

Solution

  • This will do what you want to achieve:

    setwd("C:/Users/yourname/Downloads")
    library(readxl)
    library(openxlsx)
    
    workbook1 <- read_excel("Workbook1.xlsx", sheet = NULL)
    sheets_1 <- excel_sheets("Workbook1.xlsx")
    
    workbook2 <- read_excel("Workbook2.xlsx", sheet = NULL)
    sheets_2 <- excel_sheets("Workbook2.xlsx")
    
    combined_wb <- createWorkbook()
    
    
    for (sheet_name in sheets_1) { 
    
      sheet_data_1 <- read_excel("Workbook1.xlsx", sheet = sheet_name)
      
      addWorksheet(combined_wb, sheetName = table_name)
      
      writeData(combined_wb, sheet = table_name, x = sheet_data_1, startRow = 1, startCol = 1)
      
      if (sheet_name %in% sheets_2) {
        sheet_data_2 <- read_excel("Workbook2.xlsx", sheet = sheet_name)
       
        writeData(combined_wb, sheet = table_name, x = sheet_data_2, startRow = nrow(sheet_data_1) + 2, startCol = 1)
      }
    }
    
    saveWorkbook(combined_wb, "Combined_Workbook.xlsx")