I am trying to extract relevant information from .txt files where the output is an excel table with the necessary information collected in relevant columns. Since the .txt file is converted from a html file, I have to use steps to clean the information I need into a dataframe. I can do this with one file and now I want to use a loop to loop through each file in the folder and extract the information as per my code and then save it as an excel table!
Here is the code which works for a single file from the folder:
# list all the file names of the specified pattern
fnames <- list.files(path = paste0(wd,"09_data_tables/Raster_layer_statistics"),full.names = T)
#view(fnames)
# read .txt files and extract relevant information into data table
my_tibble <- read_delim(XX.txt,delim = ":", escape_double = FALSE, col_names = FALSE,
trim_ws = TRUE, skip = 1)
# convert tibble to dataframe
my_df <- as.data.frame(my_tibble)
# remove characters from column 1
my_df$X1 <- gsub('<p>', '', my_df$X1)
# remove characters from column 2
my_df$X2 <- str_sub(my_df$X2, end = -5)
# renames columns on 'my_df'
colnames(my_df) <- c("Attribute", "Value","X")
# copy column 3 information to column 2 of first row
my_df[1,3] <-substring(my_df[1,3],61)
my_df[1,3] <- gsub("\\..*","",my_df[1,3])
my_df[1,2] <- my_df[1,3]
my_df <- my_df[,1:2]
str(my_df)
my_df_wide <- my_df %>% pivot_wider(names_from = Attribute, values_from = Value)
# Check columns classes
sapply(my_df_wide, class)
# convert character to numeric for all columns with numbers
cols <- names(my_df_wide)[2:9]
my_df_wide[cols] <- lapply(my_df_wide[cols], as.numeric)
my_df_wide <- as.data.frame(my_df_wide)
my_df_wide$Flight <- as.numeric(gsub("([0-9]+).*$", "\\1", my_df_wide$`Analyzed file`))
my_df_wide$`Analyzed file` <- sub("^[^_]*_", "", my_df_wide$`Analyzed file`)
# write dataframe to excel table
write_xlsx(my_df_wide,"10_results/raster_layer_statistics.xlsx")
I would use a list rather than a loop for this.
filename
parameter instead of XX.txt
.lapply
to apply this to all your files and create a list of data framesNOTE: I'm assuming your text file names will work as valid excel sheet names, if not you'll have to process the sheet_names
variable a bit.
Also, apologies in advance for any typos - without a reproducible example it's hard to be sure I've gotten it exactly right.
# list all the file names of the specified pattern
fnames <- list.files(path = paste0(wd,"09_data_tables/Raster_layer_statistics"),full.names = T)
#view(fnames)
# repeat without folders to get sheet names
sheet_names <- list.files(path = paste0(wd,"09_data_tables/Raster_layer_statistics"),full.names = FALSE)
# function to read one file
read_one_txt <- function(filename) {
my_tibble <- read_delim(filename, delim = ":", escape_double = FALSE, col_names = FALSE,
trim_ws = TRUE, skip = 1)
# convert tibble to dataframe
my_df <- as.data.frame(my_tibble)
# remove characters from column 1
my_df$X1 <- gsub('<p>', '', my_df$X1)
# remove characters from column 2
my_df$X2 <- str_sub(my_df$X2, end = -5)
# renames columns on 'my_df'
colnames(my_df) <- c("Attribute", "Value","X")
# copy column 3 information to column 2 of first row
my_df[1,3] <-substring(my_df[1,3],61)
my_df[1,3] <- gsub("\\..*","",my_df[1,3])
my_df[1,2] <- my_df[1,3]
my_df <- my_df[,1:2]
str(my_df)
my_df_wide <- my_df %>% pivot_wider(names_from = Attribute, values_from = Value)
# Check columns classes
sapply(my_df_wide, class)
# convert character to numeric for all columns with numbers
cols <- names(my_df_wide)[2:9]
my_df_wide[cols] <- lapply(my_df_wide[cols], as.numeric)
my_df_wide <- as.data.frame(my_df_wide)
my_df_wide$Flight <- as.numeric(gsub("([0-9]+).*$", "\\1", my_df_wide$`Analyzed file`))
my_df_wide$`Analyzed file` <- sub("^[^_]*_", "", my_df_wide$`Analyzed file`)
return(my_df_wide)
}
# apply this to the list of filenames
all_data_frames <- lapply(fnames, read_one_txt)
# add the names
names(all_data_frames) <- sheet_names
# write dataframes to excel table
write_xlsx(all_data_frames, "10_results/raster_layer_statistics.xlsx")