Determine polygon coordinates from custom txt map

I have a .txt file that looks like this

+----------+--------+------------------+
|          |         \                 |
|          |          \                |
|          |           \               |
|          |            +------+-------+
|          |                   |       |
|          |                   |       |
+----------+-------------------+-------+

How can I identify these polygons?

For this example, I expect to identify 4 polygons.

I can read the lines and identify, for example, the coordinates (row, col) for every vertice ("+"), but I'm not sure how to get the edges coordinates. My main goal here is to have a set of points describing every polygons' bounding regions.

How can I do this in R or Python?

Solution

I'm not claiming this is very efficient, but it's at least one way to tackle the problem in R. First, here's how i've read in the data.

raw <- r"(
+----------+--------+------------------+
|          |         \                 |
|          |          \                |
|          |           \               |
|          |            +------+-------+
|          |                   |       |
|          |                   |       |
+----------+-------------------+-------+
)"

data <- do.call("rbind", strsplit(readLines(textConnection(raw)), ""))

This creates an array data that has the same number of rows and columns as the input with all of the characters of the input.

The basic idea is that I try to fill each of the "empty" areas in the region and keep track of all the corners that we meet during the fill. So first, a helper function to get the rows/columns around a given cell

allnei <- expand.grid(-1:1, -1:1)
allnei <- allnei[allnei[,1] !=0 | allnei[,2] !=0, ]
get_neighbors <- function(point, data) {
  cand <- t(c(point) +t(allnei))
  cand[
    cand[,1]>=1 & cand[,1]<=nrow(data) & 
    cand[,2]>=1 & cand[,2]<=ncol(data), 
    TRUE
  ]
}

and then most of the work happens on the fill function

fill <- function(point, data, corners=matrix(nrow=0, ncol=2), fill_with="1") {
  if (!is.matrix(point)) {
    point <- matrix(point, ncol=2)
  }
  cand <- get_neighbors(point, data)
  data[point] <- fill_with
  corner_cand <- which(data[cand] == "+")
  if (length(corner_cand)) {
    corners <- unique(rbind(corners, cand[corner_cand, ]))  
  }
  empty_cand <- which(data[cand] == " "  & (cand[,1]==point[,1] | cand[,2]==point[,2]))
  if (length(empty_cand)>0) {
    for(i in empty_cand) {
      result <- fill(cand[i,], data, corners, fill_with)
      data <- result$data
      corners <- unique(rbind(corners, result$corners))
    }
  }
  list(data=data, corners=corners)
}

So starting with an "empty" cell, we look for adjacent cells and mark them as the same region. We do this recursively until we run out of adjacent empty cells.

Now we need a wrapper that will keep doing this until there are no empty regions.

find_regions <- function(data) {
  regions <- list()
  reg <- 1
  empty <- which(data==" ", arr.ind=TRUE)
  while(nrow(empty)>0) {
    empty <- empty[1,]
    results <- fill(empty, data, fill_with=reg)    
    regions <- c(regions, list(results$corners))
    data <- results$data
    reg <- reg+1
    empty <- which(data==" ", arr.ind=TRUE)
  }
  list(data=data, regions=regions)
}
result <- find_regions(data)
result$regions

Which gives the output

[1]]
     Var1 Var2
[1,]    1    1
[2,]    8    1
[3,]    8   12
[4,]    1   12

[[2]]
     Var1 Var2
[1,]    1   12
[2,]    8   12
[3,]    1   21
[4,]    5   25
[5,]    5   32
[6,]    8   32

[[3]]
     Var1 Var2
[1,]    5   25
[2,]    5   32
[3,]    5   40
[4,]    1   40

[[4]]
     Var1 Var2
[1,]    5   32
[2,]    8   32
[3,]    5   40
[4,]    8   40

where are all the row/column positions for the vertices of the polygons. And we can see the regions with

cat(paste(apply(result$data, 1, paste, collapse=""), collapse="\n"))
# +----------+--------+------------------+
# |1111111111|222222222\33333333333333333|
# |1111111111|2222222222\3333333333333333|
# |1111111111|22222222222\333333333333333|
# |1111111111|222222222222+------+-------+
# |1111111111|2222222222222222222|4444444|
# |1111111111|2222222222222222222|4444444|
# +----------+-------------------+-------+

I also considered extracting the list of connected corners. This method basically looks for all the corners, looks in the directions where a "wall" or connector is present and then finds the first corner in that direction. We then keep track of all the discovered pairs. So first, here's a helper function to find "connected" corners

find_hit <- function(corner, corners, rvec, cvec) {
  best <- NA
  best_dist <- Inf
  for(i in 1:nrow(corners)) {
    rdist <- corners[i, 1] - corner[,1]
    cdist <- corners[i, 2] - corner[,2]
    if(sign(rdist) == sign(rvec) & sign(cdist) == sign(cvec) & 
      ((rvec ==0 | cvec== 0) | (rdist%/%rvec ==cdist %/%cvec))) {
      dist <- rdist + cdist
      if (dist < best_dist) {
        best_dist <- dist
        best <- i
      }
    }
  }
  best
}

The idea is that given a point and a list of candidate points, we look along a particular "vector" to see of there is a point at the end of that direction and return the first hit. We also have a number helper to make sure the points we are looking at are "in bounds"

in_bounds <- (function(data) {
  function(r, c) {
    r > 0 & r <= nrow(data) & c > 0 & c <= ncol(data)
  }
})(data)

Now we find all the corners and loop through them to find the connections

corners <- which(data=="+", arr.ind = TRUE)
edges <- list()
for (i in 1:(nrow(corners)-1)) {
  corner <- corners[i, , drop=FALSE]
  rest <- corners[-(1:i), , drop=FALSE]
  r <- corner[, 1]
  c <- corner[, 2]
  for (dr in c(-1, 0, 1)) {
    for (dc in c(-1, 0, 1)) {
      if (in_bounds(r+dr, c+dc) & (dr != 0| dc != 0) && data[r+dr, c+dc]!=" ") {
        hit <- find_hit(corner, rest, dr, dc)
        if (!is.na(hit)) {
          edges <- c(edges, list(c(i, i+hit)))
        } 
      }
    }
  }
}
edges

So edges is a list telling which corners are connected. We can turn this into an igraph object with

library(igraph)
dd <- as.data.frame(do.call(rbind, edges))
dd[] <- lapply(dd, as.character)
g <- graph_from_data_frame(dd, directed = FALSE)
vnames <- as.character(1:nrow(corners))
V(g)[vnames]$row <- corners[,1]
V(g)[vnames]$col <- corners[,2]
plot(g)

And then you can get the row/column information out with

as.data.frame(vertex_attr(g))

And to put them in the right shape, you can set the x and y attributes (here I flip the y direction since (0,0) is lower left when plotting but top left in the matrix)

V(g)[vnames]$y <- -corners[,1]
V(g)[vnames]$x <- corners[,2]

plot(g)