Search code examples
c++pointsclosest

Closest pair of points O(nlogn) algorithm - problem with some data in c++ implementation


I have question about divide and conquer algorithm to find closest points. I checked C++ implementation on this page https://www.geeksforgeeks.org/closest-pair-of-points-onlogn-implementation/ But there is a problem with this code. It works fine in most cases, but for some data, this implementation returns other result, than brute-force method. For example, lets take ten points (x, y):

(795 981)
(1905 4574)
(8891 665)
(6370 1396)
(93 8603)
(302 7099)
(326 5318)
(4493 3977)
(429 8687)
(9198 1558)

For this data, O(n log n) algorithm returns 944.298 instead of 346.341 given by brute force. Why is this happening?

This is exactly geeksforgeeks implementation with my example data:

#include <iostream>
#include <float.h>
#include <stdlib.h>
#include <math.h>
using namespace std;

struct Point
{
    int x, y;
};

int compareX(const void* a, const void* b)
{
    Point *p1 = (Point *)a,  *p2 = (Point *)b;
    return (p1->x - p2->x);
}

int compareY(const void* a, const void* b)
{
    Point *p1 = (Point *)a,   *p2 = (Point *)b;
    return (p1->y - p2->y);
}


float dist(Point p1, Point p2)
{
    return sqrt( (p1.x - p2.x)*(p1.x - p2.x) +
                 (p1.y - p2.y)*(p1.y - p2.y)
    );
}

float bruteForce(Point P[], int n)
{
    float min = FLT_MAX;
    for (int i = 0; i < n; ++i)
        for (int j = i+1; j < n; ++j)
            if (dist(P[i], P[j]) < min)
                min = dist(P[i], P[j]);
    return min;
}

float min(float x, float y)
{
    return (x < y)? x : y;
}


float stripClosest(Point strip[], int size, float d)
{
    float min = d;  // Initialize the minimum distance as d 

    for (int i = 0; i < size; ++i)
        for (int j = i+1; j < size && (strip[j].y - strip[i].y) < min; ++j)
            if (dist(strip[i],strip[j]) < min)
                min = dist(strip[i], strip[j]);

    return min;
}

float closestUtil(Point Px[], Point Py[], int n)
{
    // If there are 2 or 3 points, then use brute force 
    if (n <= 3)
        return bruteForce(Px, n);

    // Find the middle point 
    int mid = n/2;
    Point midPoint = Px[mid];


    Point Pyl[mid+1];   // y sorted points on left of vertical line 
    Point Pyr[n-mid-1];  // y sorted points on right of vertical line 
    int li = 0, ri = 0;  // indexes of left and right subarrays 
    for (int i = 0; i < n; i++)
    {
        if (Py[i].x <= midPoint.x)
            Pyl[li++] = Py[i];
        else
            Pyr[ri++] = Py[i];
    }

    float dl = closestUtil(Px, Pyl, mid);
    float dr = closestUtil(Px + mid, Pyr, n-mid);

    // Find the smaller of two distances 
    float d = min(dl, dr);

    Point strip[n];
    int j = 0;
    for (int i = 0; i < n; i++)
        if (abs(Py[i].x - midPoint.x) < d)
            strip[j] = Py[i], j++;

    return min(d, stripClosest(strip, j, d) );
}

float closest(Point P[], int n)
{
    Point Px[n];
    Point Py[n];
    for (int i = 0; i < n; i++)
    {
        Px[i] = P[i];
        Py[i] = P[i];
    }

    qsort(Px, n, sizeof(Point), compareX);
    qsort(Py, n, sizeof(Point), compareY);

    // Use recursive function closestUtil() to find the smallest distance 
    return closestUtil(Px, Py, n);
}

// Driver program to test above functions 
int main()
{
    Point P[] = {{795, 981}, {1905, 4574}, {8891, 665}, {6370, 1396}, {93, 8603}, {302, 7099},
                 {326, 5318}, {4493, 3977}, {429, 8687}, {9198, 1558}};
    int n = sizeof(P) / sizeof(P[0]);
    cout << closest(P, n) << std::endl;
    cout << bruteForce(P, n) << std::endl;
    return 0;
} 

Has anyone idea what is wrong here? I have been trying fix it for few hours, but I don't really understand why this problem happens.


Solution

  • Since Pyl and Pyr have the sizes of mid+1 and n-mid-1 respectively, the following two lines

    float dl = closestUtil(Px,     Pyl, mid  );
    float dr = closestUtil(Px+mid, Pyr, n-mid);
    

    should be rewritten as follows:

    float dl = closestUtil(Px,       Pyl, mid+1  );
    float dr = closestUtil(Px+mid+1, Pyr, n-mid-1);
    

    In addition, as commented in the source code of this site, in the above code it is assumed that all x coordinates are distinct. For instance, if all x coordinates are same, li is incremented from 0 to n and unexpected behavior occurs at Pyl[li++] = Py[i]; when li >= mid+1.


    BTW, VLA (Variable Vength Arrays) does not exist at all in the C++ specification. Since arrays Px, Py, Pyl and Pyr are created on stack with automatic storage duration, their sizes should be determined at compile-time. But some C++ compilers including GNU compiler support VLA as a compiler extension and allows declaring C-style arrays with a dynamic length. (How the memory is allocated for the VLA is implementation specific.) But C++ provides dynamic array functionality by std::vector which might make our code more readable, portable and robust.