Search code examples
pythonrcomputational-geometry

Determine polygon coordinates from custom txt map


I have a .txt file that looks like this

+----------+--------+------------------+
|          |         \                 |
|          |          \                |
|          |           \               |
|          |            +------+-------+
|          |                   |       |
|          |                   |       |
+----------+-------------------+-------+

How can I identify these polygons?

For this example, I expect to identify 4 polygons.

I can read the lines and identify, for example, the coordinates (row, col) for every vertice ("+"), but I'm not sure how to get the edges coordinates. My main goal here is to have a set of points describing every polygons' bounding regions.

How can I do this in R or Python?


Solution

  • I'm not claiming this is very efficient, but it's at least one way to tackle the problem in R. First, here's how i've read in the data.

    raw <- r"(
    +----------+--------+------------------+
    |          |         \                 |
    |          |          \                |
    |          |           \               |
    |          |            +------+-------+
    |          |                   |       |
    |          |                   |       |
    +----------+-------------------+-------+
    )"
    
    data <- do.call("rbind", strsplit(readLines(textConnection(raw)), ""))
    

    This creates an array data that has the same number of rows and columns as the input with all of the characters of the input.

    The basic idea is that I try to fill each of the "empty" areas in the region and keep track of all the corners that we meet during the fill. So first, a helper function to get the rows/columns around a given cell

    allnei <- expand.grid(-1:1, -1:1)
    allnei <- allnei[allnei[,1] !=0 | allnei[,2] !=0, ]
    get_neighbors <- function(point, data) {
      cand <- t(c(point) +t(allnei))
      cand[
        cand[,1]>=1 & cand[,1]<=nrow(data) & 
        cand[,2]>=1 & cand[,2]<=ncol(data), 
        TRUE
      ]
    }
    

    and then most of the work happens on the fill function

    fill <- function(point, data, corners=matrix(nrow=0, ncol=2), fill_with="1") {
      if (!is.matrix(point)) {
        point <- matrix(point, ncol=2)
      }
      cand <- get_neighbors(point, data)
      data[point] <- fill_with
      corner_cand <- which(data[cand] == "+")
      if (length(corner_cand)) {
        corners <- unique(rbind(corners, cand[corner_cand, ]))  
      }
      empty_cand <- which(data[cand] == " "  & (cand[,1]==point[,1] | cand[,2]==point[,2]))
      if (length(empty_cand)>0) {
        for(i in empty_cand) {
          result <- fill(cand[i,], data, corners, fill_with)
          data <- result$data
          corners <- unique(rbind(corners, result$corners))
        }
      }
      list(data=data, corners=corners)
    }
    

    So starting with an "empty" cell, we look for adjacent cells and mark them as the same region. We do this recursively until we run out of adjacent empty cells.

    Now we need a wrapper that will keep doing this until there are no empty regions.

    find_regions <- function(data) {
      regions <- list()
      reg <- 1
      empty <- which(data==" ", arr.ind=TRUE)
      while(nrow(empty)>0) {
        empty <- empty[1,]
        results <- fill(empty, data, fill_with=reg)    
        regions <- c(regions, list(results$corners))
        data <- results$data
        reg <- reg+1
        empty <- which(data==" ", arr.ind=TRUE)
      }
      list(data=data, regions=regions)
    }
    result <- find_regions(data)
    result$regions
    

    Which gives the output

    [1]]
         Var1 Var2
    [1,]    1    1
    [2,]    8    1
    [3,]    8   12
    [4,]    1   12
    
    [[2]]
         Var1 Var2
    [1,]    1   12
    [2,]    8   12
    [3,]    1   21
    [4,]    5   25
    [5,]    5   32
    [6,]    8   32
    
    [[3]]
         Var1 Var2
    [1,]    5   25
    [2,]    5   32
    [3,]    5   40
    [4,]    1   40
    
    [[4]]
         Var1 Var2
    [1,]    5   32
    [2,]    8   32
    [3,]    5   40
    [4,]    8   40
    

    where are all the row/column positions for the vertices of the polygons. And we can see the regions with

    cat(paste(apply(result$data, 1, paste, collapse=""), collapse="\n"))
    # +----------+--------+------------------+
    # |1111111111|222222222\33333333333333333|
    # |1111111111|2222222222\3333333333333333|
    # |1111111111|22222222222\333333333333333|
    # |1111111111|222222222222+------+-------+
    # |1111111111|2222222222222222222|4444444|
    # |1111111111|2222222222222222222|4444444|
    # +----------+-------------------+-------+
    

    I also considered extracting the list of connected corners. This method basically looks for all the corners, looks in the directions where a "wall" or connector is present and then finds the first corner in that direction. We then keep track of all the discovered pairs. So first, here's a helper function to find "connected" corners

    find_hit <- function(corner, corners, rvec, cvec) {
      best <- NA
      best_dist <- Inf
      for(i in 1:nrow(corners)) {
        rdist <- corners[i, 1] - corner[,1]
        cdist <- corners[i, 2] - corner[,2]
        if(sign(rdist) == sign(rvec) & sign(cdist) == sign(cvec) & 
          ((rvec ==0 | cvec== 0) | (rdist%/%rvec ==cdist %/%cvec))) {
          dist <- rdist + cdist
          if (dist < best_dist) {
            best_dist <- dist
            best <- i
          }
        }
      }
      best
    }
    

    The idea is that given a point and a list of candidate points, we look along a particular "vector" to see of there is a point at the end of that direction and return the first hit. We also have a number helper to make sure the points we are looking at are "in bounds"

    in_bounds <- (function(data) {
      function(r, c) {
        r > 0 & r <= nrow(data) & c > 0 & c <= ncol(data)
      }
    })(data)
    

    Now we find all the corners and loop through them to find the connections

    corners <- which(data=="+", arr.ind = TRUE)
    edges <- list()
    for (i in 1:(nrow(corners)-1)) {
      corner <- corners[i, , drop=FALSE]
      rest <- corners[-(1:i), , drop=FALSE]
      r <- corner[, 1]
      c <- corner[, 2]
      for (dr in c(-1, 0, 1)) {
        for (dc in c(-1, 0, 1)) {
          if (in_bounds(r+dr, c+dc) & (dr != 0| dc != 0) && data[r+dr, c+dc]!=" ") {
            hit <- find_hit(corner, rest, dr, dc)
            if (!is.na(hit)) {
              edges <- c(edges, list(c(i, i+hit)))
            } 
          }
        }
      }
    }
    edges
    

    So edges is a list telling which corners are connected. We can turn this into an igraph object with

    library(igraph)
    dd <- as.data.frame(do.call(rbind, edges))
    dd[] <- lapply(dd, as.character)
    g <- graph_from_data_frame(dd, directed = FALSE)
    vnames <- as.character(1:nrow(corners))
    V(g)[vnames]$row <- corners[,1]
    V(g)[vnames]$col <- corners[,2]
    plot(g)
    

    connected corner graph

    And then you can get the row/column information out with

    as.data.frame(vertex_attr(g))
    

    And to put them in the right shape, you can set the x and y attributes (here I flip the y direction since (0,0) is lower left when plotting but top left in the matrix)

    V(g)[vnames]$y <- -corners[,1]
    V(g)[vnames]$x <- corners[,2]
    
    plot(g)
    

    points in map order