Search code examples
algorithmscalacoordinatesdistancepoints

Count the number of points closer than a given distance


I'm developing a project that given a set of coordinates (lat and longitude) needs to find all the points that are closer than a given distance (in Km).

I have working implementation that iterates over each point and for each point iterates over all the other points. This is O(n^2). I'd like to know what approach could I follow to improve this, having in mind that I don't want the closest point but the ones that are closer than x Km (also I'd like to be able to do the opposite, finding all the points that are further away than x Km).

If you could provide some ideas and algorithms it would be nice. Also code examples would be nice, specially in Scala (I'm developing this project Scala).


Solution

  • Answering myself a few months late. Well I ended up using using KD-Tree to implement this. I based myself on this Java implementation: http://people.cs.vt.edu/~shaffer/Book/JAVA/progs/KDtree/

    However I made some modifications to it (including porting it to Scala) and adapting it to my specific problem around Geocaches. The resulting code was the following:

    /*
     * Based on
     * http://people.cs.vt.edu/~shaffer/Book/JAVA/progs/KDtree/
     * 
     * That code is made available as part of this textbook on Data Structures:
     * http://people.cs.vt.edu/~shaffer/Book/
     */
    //A node for a KDTree
    class KDNode[E] {
      private var key_value : Array[Double] = null   //key for this node
      def key = key_value                     
      def key_=(k : Array[Double]) { key_value = k } 
    
      private var element_value: E = _               //Element for this node
      def element = element_value
      def element_=(e : E) {element_value = e}
    
      private var left_value : KDNode[E] = null      //Pointer to left child
      def left = left_value
      def left_=(l : KDNode[E]) {left_value = l}
    
      private var right_value : KDNode[E] = null     //Pointer to right child
      def right = right_value
      def right_=(r : KDNode[E]) {right_value = r}
      /** Constructors */
      def this(k : Array[Double], value : E) = 
      {
        this()
        key = k
        element = value
      }
      def this(k : Array[Double], value : E, l : KDNode[E], r : KDNode[E]) = 
      {
        this(k,value)
        left = l
        right = r    
      }
      //Checks if it's a leaf
      def isLeaf : Boolean = 
      {
        return (left == null) && (right == null)
      }
    }
    
    //Trait for a basic Dictionary that will be used as basis for our KDTree
    trait Dictionary[K,V] {
      def size: Int
      def insert(key: K, value: V)
      def remove(key: K): V
      def find(key: K): V
      def clear
      def isEmpty: Boolean
    }
    
    /**
     * A extended trait that defines that the key of the dictionary is an array of
     * doubles and defines the need of a method called regionSearchGeo (very
     * important for All Years 5 & 6 stats)
     */
    trait GeoDictionary[E] extends Dictionary[Array[Double], E] {
      def regionSearchGeo(k: Array[Double], radius: Double) : List[E]
    }
    
    /**
     * Our implementation of a KDTree. It's based on the code provided in the
     * reference above, but had a few key areas modified for geographic searching.
     * For example, this KDtree is a 3D Tree. The keys are Latitude, Longitude and
     * the Year of the cache (that was also included to speed thins even further)
     */
    class K3DGeoTree[E] extends GeoDictionary[E]{
    
      private var root : KDNode[E] = null
      private val D : Int = 3 // Only supporting 2D points in this implementation
      private var nodecount : Int = 0 // Number of nodes in the KD tree
    
      /**
       * Implementing everything that is required by the Dictionary trait
       * Some private auxiliary methods are also present
       */
      def clear() = { root = null }
      def isEmpty() : Boolean = { root == null }
      def size() : Int = { return nodecount }
    
      def insert(pt : Array[Double], value : E) = {
        root = inserthelp(root, pt, value, 0)
        nodecount=nodecount+1
      }
      def inserthelp(rt :  KDNode[E], key : Array[Double], value : E, level : Int)
                                     : KDNode[E] = {
        if (rt == null) return new KDNode[E](key, value)
        val rtkey : Array[Double] = rt.key
        if (rtkey(level) > key(level))
            rt.left = inserthelp(rt.left, key, value, (level+1)%D)
        else
            rt.right = inserthelp(rt.right, key, value, (level+1)%D)
        return rt
      }
    
      private def findmin(rt : KDNode[E], descrim : Int, level : Int): KDNode[E]= {
      var temp1 : KDNode[E] = null
      var temp2 : KDNode[E] = null
      var key1 : Array[Double] = null
      var key2 : Array[Double] = null
      if (rt == null) return null
      temp1 = findmin(rt.left, descrim, (level+1)%D)
      if (temp1 != null) key1 = temp1.key
      if (descrim != level) {
        temp2 = findmin(rt.right, descrim, (level+1)%D)
        if (temp2 != null) key2 = temp2.key
        if ((temp1 == null) || ((temp2 != null) && (key1(descrim) > key2(descrim))))
        temp1 = temp2
        key1 = key2
      } // Now, temp1 has the smaller value
      var rtkey : Array[Double] = rt.key
      if ((temp1 == null) || (key1(descrim) > rtkey(descrim)))
        return rt
      else
        return temp1
    }
    
      def find(key : Array[Double]) : E = { return findhelp(root, key, 0) }
    
      private def findhelp(rt : KDNode[E], key : Array[Double], level : Int) : E ={
      if (rt == null) return null.asInstanceOf[E]
      val it : E = rt.element
      val itkey : Array[Double]= rt.key
      if ((itkey(0) == key(0)) && (itkey(1) == key(1)))
        return rt.element
      if (itkey(level) > key(level))
        return findhelp(rt.left, key, (level+1)%D)
      else
        return findhelp(rt.right, key, (level+1)%D)
    }
    
      def remove(key : Array[Double]) : E = {
        val temp : E = findhelp(root, key, 0)   // First find it
        if (temp != null) {
                root = removehelp(root, key, 0) // Now remove it
                nodecount=nodecount-1
        }
        return temp
      }
    
      private def removehelp(rt : KDNode[E], key : Array[Double], level : Int)
                                                 : KDNode[E] = {
          if (rt == null) return null
          val rtkey : Array[Double] = rt.key
          if (key(level) < rtkey(level))
            rt.left = removehelp(rt.left, key, (level+1)%D)
          else if (key(level) > rtkey(level))
            rt.right = removehelp(rt.right, key, (level+1)%D)
          else {  // Found it
          if (rt.right == null)
            if (rt.left == null) // Just drop element
             return null
          else { // Switch subtree to right
                rt.right = rt.left
                rt.left = null
          }
          val temp : KDNode[E] = findmin(rt.right, level, (level+1)%D)
          rt.right = removehelp(rt.right, temp.key, (level+1)%D)
          rt.element = temp.element
        }
        return rt
      }
    
      /**
       * Implementing the GeoDictionary trait
       */
      def regionSearchGeo(point: Array[Double], radius: Double) : List[E] = 
      {
        val pointGeo : GeoLocation = GeoLocation.fromDegrees(point(0), point(1))
        /**
         * Calculates a bounding rectangle that contains the circle with the given
         * radius. This will be explained later in the corresponding class
         */
        val boundingRect = pointGeo.boundingCoordinates(radius)  
        //Return the caches found
        return rsGeoHelp(root, point, radius, boundingRect, 0)
      }
      /**
       * Auxiliary region search function that does all the heavy work
       */
      private def rsGeoHelp(rt : KDNode[E], point : Array[Double], radius : Double,
                                                    boundingRect : Tuple2[GeoLocation,GeoLocation],
                                                    lev : Int): List[E] = {
      if (rt == null) return Nil
      val rtkey : Array[Double] = rt.key
      var found : List[E] = Nil
     //Checks if the current node is in the desired radius (and also the year)
      if (InCircleGeo(point, radius, rtkey))
              found = List(rt.element)
     //First Dimension is latitude
      if(lev % D == 0){
            if (rtkey(lev) >= boundingRect._1.degLat)
            found = found:::rsGeoHelp(rt.left, point, radius, boundingRect, (lev+1)%D)
            if (rtkey(lev) <= boundingRect._2.degLat)
            found = found:::rsGeoHelp(rt.right, point, radius, boundingRect, (lev+1)%D)
      }
     //Second Dimension is Longitude
      else if(lev % D == 1){
            if (rtkey(lev) >= boundingRect._1.degLon)
            found = found:::rsGeoHelp(rt.left, point, radius, boundingRect, (lev+1)%D)
            if (rtkey(lev) <= boundingRect._2.degLon)
        found = found:::rsGeoHelp(rt.right, point, radius, boundingRect, (lev+1)%D)
      }
     //Third and last dimension is the year
      else{
        found = found:::rsGeoHelp(rt.left, point, radius, boundingRect, (lev+1)%D)
            if (rtkey(lev) <= point(lev))
            found = found:::rsGeoHelp(rt.right, point, radius, boundingRect, (lev+1)%D)
      }
      //Return the found nodes (in our case it will be caches)
      return found
      }
      private def InCircleGeo(point : Array[Double], radius : Double,
                                                      coord : Array[Double]) : Boolean = {
      //Creates a GeoLocation object for each point
      val pointGeo : GeoLocation = GeoLocation.fromDegrees(point(0), point(1))
      val coordGeo : GeoLocation = GeoLocation.fromDegrees(coord(0), coord(1))
      /**
       * If the year is smaller than the query point and the distance is within
       * radius return true. Else it's false.
       */
      return (coord(0) != point(0) && coord(1) != point(1) && coord(2) <= point(2)
             && pointGeo.distanceTo(coordGeo) < radius)
      }
    }
    
    /**
     * This class encapsulates a series of utility methods to deal with geographic
     * coordinates. It was based on the information in the link below that gives
     * a very good insight about how to do math with geographic coordinates and
     * also provides some Java samples that we used as an inspiration for this
     * class.
     * Link: http://janmatuschek.de/LatitudeLongitudeBoundingCoordinates
     */
    //Companion object of Class GeoLocation to define static methods and variables
    object GeoLocation {
      //Min maxs in terms of Latitude and Longitude accross the globe
      private val MIN_LAT : Double = math.toRadians(-90)  // -PI/2
      private val MAX_LAT : Double = math.toRadians(90)   //  PI/2
      private val MIN_LON : Double = math.toRadians(-180) // -PI
      private val MAX_LON : Double = math.toRadians(180)  //  PI
      /**
       * Earth radius. This value is the most used but there are others that may
       * give slightly different results.
       */
      private val RADIUS : Double = 6372.8
    
      /**
       * A factory method that creates a GeoLocation object from given latitude and
       * longitude in degrees
       */
      def fromDegrees(latitude : Double, longitude : Double) : GeoLocation = {
        val result : GeoLocation = new GeoLocation()
            result.radLat = math.toRadians(latitude)
            result.radLon = math.toRadians(longitude)
            result.degLat = latitude
            result.degLon = longitude
            result.checkBounds
            return result
      }
    
      /**
       * A factory method that creates a GeoLocation object from given latitude and
       * longitude in radians
       */
      def fromRadians(latitude : Double, longitude : Double) : GeoLocation = {
            val result : GeoLocation = new GeoLocation()
            result.radLat = latitude
            result.radLon = longitude
            result.degLat = math.toDegrees(latitude)
            result.degLon = math.toDegrees(longitude)
            result.checkBounds
            return result
      }
    }
    /**
     * The GeoLocation class itself. The constructor is private use the factory
     * methods above.
     */
    class GeoLocation private{
    
      /**
       * Getters and Setters implemented as properties with syntactic sugar
       * This properties  contain the latitude and longitude in degrees and radians
       */
      private var radLat_value : Double = _
      def radLat = radLat_value                    
      private def radLat_=(k : Double) { radLat_value = k } 
    
      private var radLon_value : Double = _
      def radLon = radLon_value                    
      private def radLon_=(k : Double) { radLon_value = k } 
    
      private var degLat_value : Double = _
      def degLat = degLat_value                    
      private def degLat_=(k : Double) { degLat_value = k } 
    
      private var degLon_value : Double = _
      def degLon = degLon_value                    
      private def degLon_=(k : Double) { degLon_value = k } 
    
      /**
       * Check if the vales are valid considering the MIN and MAX for latitude and
       * longitude.
       */
      private def checkBounds = {
            if (radLat < GeoLocation.MIN_LAT || radLat > GeoLocation.MAX_LAT ||
                    radLon < GeoLocation.MIN_LON || radLon > GeoLocation.MAX_LON)
                throw new IllegalArgumentException()
      }
    
      /**
       * Function to calculate the distance between this GeoLocation and the given
       * GeoLocation.
       * 
       * Check the reference above and
       * http://en.wikipedia.org/wiki/Haversine_formula
       * for more information.
       */
      def distanceTo(location : GeoLocation) : Double = {
            return math.acos(math.sin(radLat) * math.sin(location.radLat) +
                       math.cos(radLat) * math.cos(location.radLat) *
                       math.cos(radLon - location.radLon)) * GeoLocation.RADIUS
      }
    
      /**
       * This method is very important for the search made in the K3DTree.
       * It allows us to make a bouding rectangle with the given distance/radius
       * that is geometrically correct. Check the reference above to learn more
       * about the math involved.
       */
      def boundingCoordinates(distance : Double)
            : Tuple2[GeoLocation, GeoLocation] = {
        if (distance < 0d) throw new IllegalArgumentException()
            // Angular distance in radians on a great circle
            val radDist : Double = distance / GeoLocation.RADIUS
    
            //Initialize local variables to check for poles
            var minLat : Double = radLat - radDist
            var maxLat : Double = radLat + radDist
    
            var minLon : Double = 0
            var maxLon : Double = 0
    
            //Normal case
            if (minLat > GeoLocation.MIN_LAT && maxLat < GeoLocation.MAX_LAT) {
                    val deltaLon : Double = math.asin(math.sin(radDist) / math.cos(radLat))
                            minLon = radLon - deltaLon
                    if (minLon < GeoLocation.MIN_LON) minLon += 2d * math.Pi
                            maxLon = radLon + deltaLon
                    if (maxLon > GeoLocation.MAX_LON) maxLon -= 2d * math.Pi
            }
        //Special case in which a pole is within the distance
            else{
                            minLat = math.max(minLat, GeoLocation.MIN_LAT)
                            maxLat = math.min(maxLat, GeoLocation.MAX_LAT)
                            minLon = GeoLocation.MIN_LON
                            maxLon = GeoLocation.MAX_LON
                    }       
        /**
         * Each of the bounding points (one in the south-west, bottom-left,
         * and other in the north-east, top-right)
         */
        val swPoint : GeoLocation = GeoLocation.fromRadians(minLat, minLon)
        val nePoint : GeoLocation = GeoLocation.fromRadians(maxLat, maxLon) 
        //Return the tuple with the two points
        return (swPoint, nePoint)
      }
    }
    

    The whole code is documented so I hope this helps someone with a similar problem. In this specific problem I had to deal with years besides latitude and longitude, so I added an extra dimension. But for a more general geographic problem it's even easier to do it with only two dimensions (one for latitude and one for longitude).