Search code examples
rsortinggeolocationgeospatialosrm

How do I sort points based on a time matrix in R?


I have a time matrix in R which I computed with the help of osrm package. I want to sort the points based on the neighboring points. The sample data:

name <- LETTERS[1:10]
lat  <- c(22.57, 22.69, 22.72, 22.50, 22.66, 22.19, 22.60, 22.27, 22.31, 22.15)
lon  <- c(88.69, 88.84, 88.77, 88.85, 88.63, 88.91, 88.54, 88.62, 88.78, 88.66)
demand <- c(30, 70, 75, 100, 45, 60, 135, 65, 55, 50)

df<-data.frame(name, lon, lat, demand)

computing the time matrix

library(osrm)


time<- osrmTable(df[,c('name', 'lon', 'lat')])
time_matrix<- time$durations

Now, I want a data frame something like this, based on the time matrix above.

From To Time Demand
A    G  30.1 135
G    E  33.9 45
E    C  30.3 75

I can find the nearest point but I need to check whether the nearest point is included in the From column. If it has been then the 2nd nearest point will be used and so on. Like G's nearest point is A here, but as it has been included already, so it will be E (the 2nd nearest point). Similarly, it will go on until all the points have been included in the table.

How do I do it?


Solution

  • The solution depends on the starting point (that we can assume to be the first point in the data) and how the following point is selected.

    Continuous path

    The following point is the nearest neighbor:

    diag(time_matrix) <- NA
    
    nearestpoints <- data.frame(matrix(ncol = 4, nrow = 0))
    colnames(nearestpoints) <- c("From", "To", "Time", "Demand")
    
    inputrowindex=1
    outputrowindex=1
    visitedpoints <- c(rownames(time_matrix)[1]) #The visited points are the 'To' points
    
    while(length(setdiff(rownames(time_matrix), visitedpoints)) > 0){
      nearest <- which.min(time_matrix[inputrowindex,])
      if(length(nearest)==0) break
      
      nearestpoints[outputrowindex, 1] <- rownames(time_matrix)[inputrowindex]
      nearestpoints[outputrowindex, 2] <- names(nearest)
      nearestpoints[outputrowindex, 3] <- time_matrix[inputrowindex, nearest]
      nearestpoints[outputrowindex, 4] <- df[nearest, 4]
      
      time_matrix[inputrowindex,] <- NA
      time_matrix[,inputrowindex] <- NA
      
      visitedpoints <- c(visitedpoints, names(nearest))
      
      inputrowindex = as.numeric(nearest) #Next point is the nearest
      outputrowindex = outputrowindex + 1
    }
    

    Which gives:

    head(nearestpoints)
    #  From To Time Demand
    #1    A  G 30.1    135
    #2    G  E 33.7     45
    #3    E  C 30.3     75
    #4    C  B 11.4     70
    #5    B  D 35.2    100
    #6    D  I 56.5     55
    

    Data ordered path

    The following point is the next one in the data:

    diag(time_matrix) <- NA
    
    nearestpoints <- data.frame(matrix(ncol = 4, nrow = 0))
    colnames(nearestpoints) <- c("From", "To", "Time", "Demand")
    
    inputrowindex=1
    outputrowindex=1
    
    visitedpoints <- c() #The visited points are the 'From' points
    
    while(length(setdiff(rownames(time_matrix), visitedpoints)) > 0){
      nearest <- which.min(time_matrix[inputrowindex,])
      if(length(nearest)==0) break
      
      nearestpoints[outputrowindex, 1] <- rownames(time_matrix)[inputrowindex]
      nearestpoints[outputrowindex, 2] <- names(nearest)
      nearestpoints[outputrowindex, 3] <- time_matrix[inputrowindex, nearest]
      nearestpoints[outputrowindex, 4] <- df[nearest, 4]
      
      time_matrix[inputrowindex,] <- NA
      time_matrix[,inputrowindex] <- NA
      
      visitedpoints <- c(visitedpoints,  rownames(time_matrix)[inputrowindex])
      
      inputrowindex = inputrowindex + 1 #Next point in the data
      outputrowindex = outputrowindex + 1
    }
    

    Which gives:

    head(nearestpoints)
    #  From To  Time Demand
    #1    A  G  30.1    135
    #2    B  C  11.4     75
    #3    C  E  30.3     45
    #4    D  I  56.5     55
    #5    E  G  33.9    135
    #6    F  H 118.1     65
    

    Raw data:

    name <- LETTERS[1:10]
    lat  <- c(22.57, 22.69, 22.72, 22.50, 22.66, 22.19, 22.60, 22.27, 22.31, 22.15)
    lon  <- c(88.69, 88.84, 88.77, 88.85, 88.63, 88.91, 88.54, 88.62, 88.78, 88.66)
    demand <- c(30, 70, 75, 100, 45, 60, 135, 65, 55, 50)
    df <- data.frame(name, lat, lon, demand)
    
    library(osrm)
    time <- osrmTable(df[,c('name', 'lon', 'lat')])
    time_matrix <- time$durations