Search code examples
rfunctionautomationraster

R: Create function that iteratively performs some analysis to pairs of rasters, based on their names


I am having 2 sets of raster data and their names are:

ntl_'a number'.tif

pop_'a number'.tif

My goal is to create a function that reads the first pair of rasters (e.g., ntl_1.tif and pop_1.tif), then executes the below code and then repeats the process with the next pair:

library(raster)
library(DescTools)

#create a data.frame of values from the NTL and pop raster data
ntl = raster("path/ntl_1.tif")
vals_ntl <- as.data.frame(values(ntl))
ntl_coords = as.data.frame(xyFromCell(ntl, 1:ncell(ntl)))
combine <- as.data.frame(cbind(ntl_coords,vals_ntl))

pop<-raster("path/pop_1.tif")
pop = resample(pop, ntl, method = 'bilinear')
vals_pop <- as.data.frame(values(pop))

block.data <- as.data.frame(cbind(combine, vals_pop))

names(block.data)[3] <- "ntl"
names(block.data)[4] <- "pop"

block.data <- na.omit(block.data)

block.data = subset(block.data, select = -c(x, y))

# sort by ntl
block.data <-block.data[order(block.data$ntl),]

ntl_vector <- block.data[ , "ntl"]
pop_vector <- block.data[ , "pop"]

#compute gini index
Gini(ntl_vector, pop_vector, unbiased = FALSE)

My issue is with the code inside the function, I do not know how to properly make the syntax (the above code is for a pair of raster while I have hundreds of pairs). Hopefully I can get the results (i.e., the gini coefficient) of every pair in my console or, even better, in a data.frame. The data are here.


Solution

  • library(purrr)
    library(fs)
    
    raster_gini <- function(
        .ntl = "ntl_1.tif", 
        .pop = "pop_1.tif",
        .rdgal = TRUE
    ) {
      
      if(.rdgal) {
        
        ntl = raster(.ntl)
        vals_ntl <- as.data.frame(values(ntl))
        ntl_coords = as.data.frame(xyFromCell(ntl, 1:ncell(ntl)))
        combine <- as.data.frame(cbind(ntl_coords,vals_ntl))
        
        pop<-raster(.pop)
        pop = resample(pop, ntl, method = 'bilinear')
        vals_pop <- as.data.frame(values(pop))
        
        block.data <- as.data.frame(cbind(combine, vals_pop))
        
        #rename the columns
        names(block.data)[3] <- "ntl"
        names(block.data)[4] <- "pop"
        
        #remove NA values
        block.data <- na.omit(block.data)
        
        #remove the columns x & y
        block.data = subset(block.data, select = -c(x, y))
        
        # sort by ntl
        block.data <-block.data[order(block.data$ntl),]
        
        ntl_vector <- block.data[ , "ntl"]
        pop_vector <- block.data[ , "pop"]
        
        #compute gini index
        gini <- Gini(ntl_vector, pop_vector, unbiased = FALSE)
        
        c(ntl = .ntl, pop = .pop, gini = gini)
        
      } else {
        c(ntl = .ntl, pop = .pop)
      }
      
    }
    
    
    doc_paths_ntl <- fs::dir_ls("path_to_ntl_raster", glob = "*tif*") 
    
    doc_paths_pop <- fs::dir_ls("path_to_pop_raster", glob = "*tif*") 
    
    
    result_df <- purrr::map2_dfr(.x = doc_paths_ntl, .y = doc_paths_pop, .f = raster_gini)
    
    
    result_df <- result_df |> 
      dplyr::mutate(ntl = basename(ntl)) |> 
      dplyr::mutate(pop = basename(pop))
    
    result_df