I have two workbooks where both have the same number of worksheets and share the same worksheet name. In each worksheet of either workbooks, they have the same column name but the strings in each row of workbook 1 is different to that of workbook 2. How to combine the different strings together in each worksheets into the worksheets of the new workbooks with R?
Below is the code that I have tried but unsuccessful:
output_folder_for_xlsx1 <- "replace with folder path for xlsx1"
output_folder_for_xlsx2 <- "replace with folder path for xlsx2"
combined_output_folder <- "replace with folder path for combined xlsx"
output_file_from_xlsx1 <- file.path(output_folder_for_xlsx1, "xlsx1.xlsx")
output_file_from_xlsx2 <- file.path(output_folder_for_xlsx2, "xlsx2.xlsx")
wb_combined <- createWorkbook()
wb1 <- loadWorkbook(output_file_from_xlsx1)
for (sheet_name in getSheetNames(wb1)) {
sheet_data <- read.xlsx(output_file_from_xlsx1, sheet = sheet_name)
addWorksheet(wb_combined, sheetName = sheet_name)
writeData(wb_combined, sheet = sheet_name, sheet_data)
}
wb2 <- loadWorkbook(output_file_from_xlsx2)
for (sheet_name in getSheetNames(wb2)) {
sheet_data <- read.xlsx(output_file_from_xlsx2, sheet = sheet_name)
addWorksheet(wb_combined, sheetName = sheet_name)
writeData(wb_combined, sheet = sheet_name, sheet_data)
}
output_combined_file <- file.path(combined_output_folder, "combined_workbook.xlsx")
saveWorkbook(wb_combined, file = output_combined_file, overwrite = TRUE)
it generates error Error in file.exists(file): invalid 'file' argument
when I ran
for (sheet_name in getSheetNames(wb1)) {
sheet_data <- read.xlsx(output_file_from_xlsx1, sheet = sheet_name)
addWorksheet(wb_combined, sheetName = sheet_name)
writeData(wb_combined, sheet = sheet_name, sheet_data)
}
or
for (sheet_name in getSheetNames(wb2)) {
sheet_data <- read.xlsx(output_file_from_xlsx, sheet = sheet_name)
addWorksheet(wb_combined, sheetName = sheet_name)
writeData(wb_combined, sheet = sheet_name, sheet_data)
}
Can someone help please? Thanks.
Link provided are example of Workbook1, Workbook2 and the expected combined workbook.
Adding snapshot below:
Workbook1
This is another code I have tried (generated by chatgpt) and same error when looping:
library(openxlsx)
library(readr)
# Paths to the input workbooks
first_workbook_path <- "Workbook1.xlsx"
second_workbook_path <- "Workbook2.xlsx"
# Load worksheets from the first workbook
worksheets_first <- loadWorkbook(first_workbook_path)
# Load worksheets from the second workbook
worksheets_second <- loadWorkbook(second_workbook_path)
# Create a new workbook to store the combined data
combined_workbook <- createWorkbook()
# Loop through each worksheet in both workbooks
for (sheet_name in getSheetNames(worksheets_first)) {
# Read data from the first workbook's worksheet
data_first <- read.xlsx(worksheets_first, sheet = sheet_name)
# Read data from the second workbook's worksheet
data_second <- read.xlsx(worksheets_second, sheet = sheet_name)
# Combine the data from both workbooks under the same worksheet name
combined_data <- rbind(data_first, data_second)
# Add the combined data to the new workbook
addWorksheet(combined_workbook, sheetName = sheet_name)
writeData(combined_workbook, sheet = sheet_name, combined_data)
}
# Path to the output combined workbook
output_combined_path <- "combined_workbook.xlsx"
# Save the combined workbook
saveWorkbook(combined_workbook, output_combined_path, overwrite = TRUE)
This will do what you want to achieve:
setwd("C:/Users/yourname/Downloads")
library(readxl)
library(openxlsx)
workbook1 <- read_excel("Workbook1.xlsx", sheet = NULL)
sheets_1 <- excel_sheets("Workbook1.xlsx")
workbook2 <- read_excel("Workbook2.xlsx", sheet = NULL)
sheets_2 <- excel_sheets("Workbook2.xlsx")
combined_wb <- createWorkbook()
for (sheet_name in sheets_1) {
sheet_data_1 <- read_excel("Workbook1.xlsx", sheet = sheet_name)
addWorksheet(combined_wb, sheetName = table_name)
writeData(combined_wb, sheet = table_name, x = sheet_data_1, startRow = 1, startCol = 1)
if (sheet_name %in% sheets_2) {
sheet_data_2 <- read_excel("Workbook2.xlsx", sheet = sheet_name)
writeData(combined_wb, sheet = table_name, x = sheet_data_2, startRow = nrow(sheet_data_1) + 2, startCol = 1)
}
}
saveWorkbook(combined_wb, "Combined_Workbook.xlsx")