r parallel-processing raster r-raster doparallel

Rasters not written to list when parallel procesing

I might be doing this all wrong, but here's what want to do... I have two big rasters I need to correct. As R is working only on one core, I want to use parallel procesing. So I split both rasters to smaller rasters, on which I want to perform my tasks. Afterwards I would merge smaller rasters into one.

My problem is, that when in foreach loop, new objects should be created - I save them to a list. But the lists remain empty.

library(raster)
library(rgdal)
library(SpaDES)
library(doParallel)


dmp <- raster("exampe1.tif"))
luc <- raster("example2.sdat"))

UseCores <- parallel::detectCores()-1  #Define how many cores you want to use
cl       <- makeCluster(UseCores)      #Register CoreCluster
registerDoParallel(cl)

dmp_s <- splitRaster(dmp, nx= 10, ny = 10)   #Split raster to smaller rasters
luc_s <- splitRaster(luc, nx= 10, ny = 10)   #Split raster to smaller rasters

dmp_s_copy <- dmp_s
luc_s_copy <- luc_s


dmp1 <- vector(mode = 'list', length = length(dmp_s))
m <- vector(mode = 'list', length = length(dmp_s))
dmp2 <- vector(mode = 'list', length = length(dmp_s))
dmp_list <- vector(mode = 'list', length = length(dmp_s))
dmp_stack <- vector(mode = 'list', length = length(dmp_s))
dmp_final1 <- vector(mode = 'list', length = length(dmp_s))

foreach(i=1:length(dmp_s)) %dopar% {
  dmp1[i] <- dmp_s[[i]] * luc_s[[i]]
  
  m[[i]] <- matrix(1, ncol=5, nrow=5)
  dmp2[[i]] <- raster::focal(dmp1[[i]], w=m[[i]], fun=max, na.rm=TRUE, pad=TRUE, NAonly=TRUE)
  
  dmp_list[[i]] <- list(dmp_s[[i]], dmp2[[i]])
  dmp_stack[[i]] <- stack(dmp_list[[i]])
  
  dmp_final1[[i]] <- min(dmp_stack[[i]])
  
}

dmp_final <- mergeRaster(dmp_final1)

The code works if I don't use parallel processing, but I need it. How do I make it work?

Solution

I think that may be because of the several list positions launched into the parallel clusters. Dependingo on you OS this can lead into issues. In this case, I'm using "fork" type (just in linux) to save some memory, but if you are using windows just remplace "FORK" by "PSOCK". Anyway, I implemented two thigs:

A faster raster splitting function.
A tidy process in parallel (avoiding intermediate objects that can cause memory issues).

Here is the code:

library(raster)
library(rgdal)
library(SpaDES)
library(doParallel)
library(mapview)

# read data
dmp <- raster("./data/example1.tif")
luc <- raster("./data/example2.tif")

# split rasters (this is slow)
# dmp_s <- splitRaster(dmp, nx= 100, ny = 100)   #Split raster to smaller rasters
# luc_s <- splitRaster(luc, nx= 100, ny = 100)   #Split raster to smaller rasters

# Function to split rasters (adapted from https://stackoverflow.com/questions/29784829/r-raster-package-split-image-into-multiples)
SplitRas <- function(raster, split){
        h <- ceiling(ncol(raster)/split)
        v <- ceiling(nrow(raster)/split)
        agg <- aggregate(raster,fact=c(h,v))
        agg[] <- 1:ncell(agg)
        agg_poly <- rasterToPolygons(agg)
        names(agg_poly) <- "polis"
        r_list <- list()
        for(i in 1:ncell(agg)){
                e1 <- extent(agg_poly[agg_poly$polis==i,])
                r_list[[i]] <- crop(raster,e1)
        }
        return(r_list)
}

# Apply function to split rasters
dmp_s <- SplitRas(dmp, 10)
luc_s <- SplitRas(luc, 10)

# start cluster & process
UseCores <- parallel::detectCores()-1  #Define how many cores you want to use
cl       <- makeCluster(UseCores, type="FORK")      #Register CoreCluster
doParallel::registerDoParallel(cl)

dmp_final1 <- foreach(i=1:length(dmp_s)) %dopar% {
        x1 <- dmp_s[[i]] * luc_s[[i]]
        m <- matrix(1, ncol=5, nrow=5)
        x2 <- raster::focal(x1, w=m, fun=max, na.rm=TRUE, pad=TRUE, NAonly=TRUE)
        min(stack(list(x1, x2)))
}

# stop cluster
stopCluster(cl)

# merge products
dmp_final <- mergeRaster(dmp_final1)

# showmap
mapview(dmp_final)