raster fill image-segmentation r-raster connected-components

moving-window raster flood fill in R

Suppose I have a raster with integer values describing the timing of an event as day of year (DOY). If there was no event in the respective year, cells are set to NA. The clump() function of the R 'raster' package would allow to detect adjacent cells of the same integer value and label them with a unique ID. Now, imagine such events (e.g. a fire) can spread in space over time, so that cell (x, y) burned on DOY 1 and the neighbouring cells (e.g. (x+1, y), (x, y+1),...) then burned on DOY 2. Hence, I'd like to identify such events where neighbouring pixels burned within a DOY difference of maximum of 2 days (e.g. DOY(x,y)=13 and DOY(x+1,y)=15) and assign these with a unique ID:

library(raster)
m<-matrix(c(1,10,11,14,
            2,2,13,NA,
            20,3,25,NA,
            21,25,7,NA), ncol=4, byrow = TRUE)
r<-raster(m) # raster object of matrix

Should yield a raster:

res_m<-matrix(c(1,2,2,2,
                1,1,2,NA,
                3,1,4,NA,
                3,4,5, NA), ncol=4, byrow = TRUE)
res_r<-raster(res_m)

Or graphically:

par(mfrow=c(1,2))
plot(r, xlim=(c(0:1)), main="DOY")
text(r)
plot(res_r, xlim=(c(0:1)), main="classified result")
text(res_r)

plot: initial DOY raster (left) vs. classified result (right)

EDIT: Referring to Lorenzo's comment: events, where propagation is e.g. DOY1, DOY2 and DOY4 should be treated as one event. However, I cannot figure out how an algorithm could look like, where two different events "melt" as they propagate, but would still be classified as two different events. So far, I solved the problem rather inefficient as follows:

#Round 1: find connected components

#cell indices
coli<-rep(1:ncol(r), nrow(r)) 
rowi<-rep(1:ncol(r), each= nrow(r)) 

#neighbourhood matrix (considering only NW, N, NE and W neighbours)
mat_nb <- matrix(c(1,1,1,
                   1,0,NA,
                   NA,NA,NA), nrow=3, ncol=3, byrow = T)

#create ascending class raster
cls<-1:ncell(r)
mcl<-setValues(r, cls)

#create empty raster to fill
ecl<-setValues(r, NA)

#loop through cells
for (j in 1:length(coli)){

  #####get adjacent cells 
  zelle<-cellFromRowCol(r, rowi[j], coli[j])
  nb <- adjacent(r, zelle, directions=mat_nb, pairs=F, sorted=T)

  if(is.na(r[zelle])) {next} # if cell=NA go to next cells
  if(length(nb) == 0) {ecl[zelle] <- mcl[zelle]} # if no neighbours, use ascending class value
  if(length(nb) > 0) {
    if(all(!is.na(r[nb[]]) & r[nb[]] %in% (r[zelle]-2):(r[zelle]+2) & !(unique(ecl[nb[]])))) 
      {ecl[zelle] <- ecl[nb[1]]}  # if all neighbours valid and from same class, assign to class
    if(!is.na(r[nb[1]]) & r[nb[1]] %in% (r[zelle]-2):(r[zelle]+2) & is.na(ecl[zelle])) 
      {ecl[zelle] <- ecl[nb[1]]} # if NW neighbour valid and zelle still unclassified, assign neighbour's class
    if(!is.na(r[nb[2]]) & r[nb[2]] %in% (r[zelle]-2):(r[zelle]+2) & is.na(ecl[zelle])) 
      {ecl[zelle] <- ecl[nb[2]]}  # same for N
    if(!is.na(r[nb[3]]) & r[nb[3]] %in% (r[zelle]-2):(r[zelle]+2) & is.na(ecl[zelle])) 
      {ecl[zelle] <- ecl[nb[3]]}  # same for NE
    if(!is.na(r[nb[4]]) & r[nb[4]] %in% (r[zelle]-2):(r[zelle]+2) & is.na(ecl[zelle])) 
      {ecl[zelle] <- ecl[nb[4]]} # same for W
    if(all(!(r[nb[]] %in% (r[zelle]-2):(r[zelle]+2)))) {ecl[zelle] <- mcl[zelle]} # if all neighbours "invalid", assign scending class value
  }
} # warnings: from pixels with less than 4 nbs

#compare result with initial raster
par(mfrow=c(1,2))
plot(r)
text(r)
plot(ecl)
text(ecl)

In Round 2, the connected component classes are combined.

##Round 2: combine classes

ecla<-ecl #save from first recursion

# only E, SW, S and SE neighbours
mat_agg<-matrix(c(NA,NA,NA,
                  NA,0,1,
                  1,1,1), nrow=3, ncol=3, byrow = T)

for (i in 1:length(coli)){

  #####get adjacent cells 
  zelle<-cellFromRowCol(r, rowi[i], coli[i])
  nb <- adjacent(r, zelle, directions=mat_agg, pairs=F, sorted=T)

  if(is.na(r[zelle])) {next}
  if(length(nb) == 0) {ecl[zelle] <- mcl[zelle]}
  if(length(nb) > 0) {
    if(r[nb[2]] %in% (r[zelle]-2):(r[zelle]+2) & ecla[zelle] < ecla[nb[2]]) {ecla[nb[2]] <- ecla[zelle]}
    if(r[nb[2]] %in% (r[zelle]-2):(r[zelle]+2) & ecla[zelle] > ecla[nb[2]]) {ecla[zelle] <- ecla[nb[2]]}
    if(r[nb[3]] %in% (r[zelle]-2):(r[zelle]+2) & ecla[zelle] < ecla[nb[3]]) {ecla[nb[3]] <- ecla[zelle]}
    if(r[nb[3]] %in% (r[zelle]-2):(r[zelle]+2) & ecla[zelle] > ecla[nb[3]]) {ecla[zelle] <- ecla[nb[3]]}
    if(r[nb[4]] %in% (r[zelle]-2):(r[zelle]+2) & ecla[zelle] < ecla[nb[4]]) {ecla[nb[4]] <- ecla[zelle]}
    if(r[nb[4]] %in% (r[zelle]-2):(r[zelle]+2) & ecla[zelle] > ecla[nb[4]]) {ecla[zelle] <- ecla[nb[4]]}
    if(r[nb[1]] %in% (r[zelle]-2):(r[zelle]+2) & ecla[zelle] < ecla[nb[1]]) {ecla[nb[1]] <- ecla[zelle]}
    if(r[nb[1]] %in% (r[zelle]-2):(r[zelle]+2) & ecla[zelle] > ecla[nb[1]]) {ecla[zelle] <- ecla[nb[1]]}
  } # warnings: from pixels with less than 4 nbs
}

# plot results
par(mfrow=c(1,3))
plot(ecl) # round 1 result
text(ecl)
plot(r)
text(r)
plot(ecla) # round 2 result
text(ecla)

Solution

This is a tricky old problem. I gave up with passing a complex function to raster::focal so I instead processed with data.table and used a series of rules. There are probably far simpler ways of doing this, but anyway.

This works for your data and a 6x6 raster I generated also. Please test it and see how it goes;

# packages
library(raster)
library(data.table)

# your example data
m<-matrix(c(1,10,11,14,
            2,2,13,NA,
            20,3,25,NA,
            21,25,7,NA), ncol=4, byrow = TRUE)
r<-raster(m)

# convert raster to data.table, add cell number attribute, and zonal ID column 
# and columns for the queen's case relationships (these columns contain the cell ID 
# that holds that relationship)
df <- as.data.table(as.data.frame(r))
df[,CELL:=as.numeric(row.names(df))]
df[,ID:=0]
df[,c("TL","T","TR","R","BR","B","BL","L") := 
   list(CELL-(ncol(r)+1),CELL-ncol(r),CELL-(ncol(r)-1),
   CELL+1,CELL+(ncol(r)+1),CELL+ncol(r),CELL+(ncol(r)-1),CELL-1)]

# set appropriate queen's case relations to NA for all edge cells
df[c(CELL %in% seq(1,ncell(r),ncol(r))), c("TL","L","BL")] <- NA
df[c(CELL %in% seq(ncol(r),ncell(r),ncol(r))), c("TR","R","BR")] <- NA
df[c(CELL %in% 1:ncol(r)), c("TL","T","TR")] <- NA
df[c(CELL %in% (ncell(r)-nrow(r)+1):ncell(r)), c("BL","B","BR")] <- NA

# melt data table, add the cell values that correspond to each relationship,
# remove rows that wont be needed just now
dfm <- melt(df,id.vars=c("layer","CELL","ID"))
dfm[,NEIGH.VAL := r[dfm[,value]]]
dfm[,WITHIN2:=ifelse(abs(layer-NEIGH.VAL)>2,F,T)]
dfm <- dfm[!is.na(value)&!is.na(layer)&!is.na(NEIGH.VAL)][order(CELL)]
dfm <- dfm[WITHIN2==TRUE]
dfm[,NEIGH.ID := 0]

# this is a tricky loop and any errors that may occur are more than likely produced here.
# It starts at the lowest cell ID with a value and builds a lookup up vector of cell IDs
# that conform to the DOY being within 2 days, then sets the ID for them all.
# It then moves on to the next cell ID available that has not had a zone ID assigned
# and does the same thing, with a zonal ID one higher. etc.
for(n in unique(dfm[,CELL])){
   while(nrow(dfm[CELL==n & NEIGH.ID==0]) > 0){
    lookups <- unique(dfm[CELL==n,value])
    while(all(unique(dfm[CELL %in% lookups,value]) == lookups)==F){
      lookups <- unique(c(lookups,dfm[CELL %in% lookups,value]))
      lookups <- unique(dfm[CELL %in% lookups,value])}
   dfm[CELL %in% lookups, NEIGH.ID := max(dfm[,NEIGH.ID])+1]
   }
}

# this section creates a raster of 1:ncell of the original data and assigns a zonal ID
# with reclassify. Everything that did not get a zonal ID (single 'island' cells
# and original NA cells) becomes NA
results <- unique(dfm[,list(CELL,NEIGH.ID)])
rzone <- r
rzone[] <- 1:ncell(rzone)
rc <- reclassify(rzone, results)
rc[!(rzone %in% results[[1]])] <- NA

# Now we need to determine those single 'island' cells and reinstate them, with their
# own ID, increasing incrementally from the highest extant ID based on the above analysis
final.vec <- which(!(rzone[] %in% results[[1]]) & !(rzone[] %in% which(is.na(values(r)))))

rc[final.vec] <- seq((cellStats(rc,max)+1),(cellStats(rc,max)+length(rc[final.vec])),1)

# plot to check
par(mfrow=c(1,3))
plot(r, xlim=(c(0:1)), main="DOY")
text(r)
plot(res_r, xlim=(c(0:1)), main="DOY")
text(res_r)
plot(rc, xlim=(c(0:1)), main="zones result")
text(rc)

this code also worked on the below, have a play around though, fingers crossed! (ignore warnings);

m<-matrix(c(1,3,5,14,NA,21,
            2,2,17,NA,23,25,
            9,15,4,5,9,NA,
            14,11,14,21,25,7,
            NA,NA,16,2,4,6,
            NA,17,25,15,1,NA), ncol=6, byrow = TRUE)
r<-raster(m) # raster object of matrix