I have a list containing 150 different DF, formatted exactly like the sample table below:
Date/Time Depth Pressure
2022-01-09 65 70
2022-08-09 65 NA
2023-01-09 65 68
2023-08-09 65 69
Each individual DF of the list is titled with the location from which the data was collected (i.e. Well A, Well B, etc . . .). I need to take each DF and make them into their own separate excel file, with their DF name used as the name of the excel file.
I loaded in my excel file to a list by using the following code
sheet_names <- excel_sheets("./Data.xlsx") #gathering all sheet names
list_all <- lapply(sheet_names, function(x) { #Read all sheets to list
as.data.frame(read_xlsx("./Data.xlsx", sheet = x, range = "K2:M200")) } )
names(list_all) <- sheet_names #Rename list elements
However, for the purposes of this task, I need to create excel files with a specific set of columns/column names. There are 30 columns, of which, I need to create a "DateTime" column which will have the Date/Time values, a "Depth" Colum for the Depth values, and a "Pressure" Column which will have the Pressure Values, the remainder will remain empty and are as such for correct upload to our database.
I have used in a very limited capacity openworkbook() and write.xlsx() but from my recollection, they were just copying the DF into an excel file and manually saving it.
I am having trouble finding examples or similar questions to solve this problem. Any help is greatly appreciated!
I am trying to create a script, that can take the single excel file, make it into a list of DF's, clean those DF's up, and then export them into individual excel files for uplaod.
Is this helpful?
#Load libraries
library(tidyverse)
library(writexl)
library(fs)
library(lubridate)
#Create a place to stash your outputs
output_dir_path<-tempfile()
output_dir_path2<-tempfile()
#We named it above, but now we need to create it
dir_create(output_dir_path)
dir_create(output_dir_path2)
#Create an example dataframe
test_data<-tibble(
date_time=seq(ymd_hms('2012-04-07 00:00:00'),ymd_hms('2013-03-22 00:00:00'), by = '1 week'),
depth=rnorm(50),
pressure=rnorm(50),
random_col_to_ignore=1:50,
site=rep(x = c("a","b","c","d","e"),10)|>sort()
)
#Edit it, break it up, and write them out
test_data|>
#Make any edits you like before the "walk2()" function.
#In this case, I'm just removing a bogus column.
#select(-random_col_to_ignore)|>
#Nesting is much cleaner than lists, in my opinion.
#Much easier to work with:
nest_by(site)%>% #Need magrittr pipe, which is fancier than base pipe, to pull next step off:
#Then, walk through the data and write it out:
{walk2(.x = .$site,
.y = .$data,
.f = function(site,data){
write_xlsx(x = data,
path = str_c(output_dir_path,"/site_",site,".xlsx"))
})}
#Open up the directory created by the script to view your output!
#Helpful touch, I know :)
#file_show(output_dir_path)
#I wasn't clear on what you wanted, so if you want the other way around,
#let's just start with what we just output:
test_data_list<-dir_ls(output_dir_path)|>
tibble(file_path = _)|>
mutate(
#Extract the site name out of the string:
site = str_extract(string = file_path, pattern = "(?<=site_).*(?=.xlsx)"),
#Read in the data useing the file path
data = map2(.x = file_path,
.y = site,
.f = function(file_path, site){
readxl::read_xlsx(path = file_path)|>
#Add in the site col:
mutate(site = site)|>
#Remove nas from pressure:
tidyr::drop_na(pressure)%>%
#Change everything but date_time,depth,pressure to NA (also, if you put "" instead of NA, it will be a blank--whatever you want)
mutate(across(.cols = -c(date_time,depth,pressure,site),
.fns = function(.x){
NA
}))
})
)%>%
#Then, walk through the data and write it out:
{walk2(.x = .$site,
.y = .$data,
.f = function(site,data){
write_xlsx(x = data%>%select(-site),
path = str_c(output_dir_path2,"/site_",site,".xlsx"))
})}
file_show(output_dir_path2)