Search code examples
sql-servert-sqlsql-server-2014sqlgeography

Insert buffered GEOGRAPHY Points into target table without duplicates


I would like to create a target table from a source table, which only contains unique/distinct points in the sense that there should be no points within the radius of 1000 meters from each other.

This is a starting point (simplified mockup using temp tables):

IF OBJECT_ID('tempdb..#Source') IS NOT NULL DROP TABLE #Source
IF OBJECT_ID('tempdb..#TargetSeeded') IS NOT NULL DROP TABLE #TargetSeeded
IF OBJECT_ID('tempdb..#TargetEmpty') IS NOT NULL DROP TABLE #TargetEmpty

CREATE TABLE #Source
(
    Id INT IDENTITY(1,1) PRIMARY KEY,
    Point GEOGRAPHY
)

CREATE TABLE #TargetSeeded
(
    Id INT IDENTITY(1,1) PRIMARY KEY,
    Point GEOGRAPHY
)

CREATE TABLE #TargetEmpty
(
    Id INT IDENTITY(1,1) PRIMARY KEY,
    Point GEOGRAPHY
)

DECLARE @Point1 GEOGRAPHY;
DECLARE @Point2 GEOGRAPHY;
DECLARE @Point3 GEOGRAPHY;
DECLARE @Point4 GEOGRAPHY;
DECLARE @PointBufferDistanceInMeters INT;

SET @Point1 = GEOGRAPHY::STPointFromText('POINT(1 52.50)', 4326);
SET @Point2 = GEOGRAPHY::STPointFromText('POINT(1 52.51)', 4326);
SET @Point3 = GEOGRAPHY::STPointFromText('POINT(1 52.52)', 4326);
SET @Point4 = GEOGRAPHY::STPointFromText('POINT(1 52.52)', 4326);
SET @PointBufferDistanceInMeters = 1000;

--SELECT @Point1.STDistance(@Point2);
--SELECT @Point1.STDistance(@Point3);
--SELECT @Point1.STDistance(@Point4);
--SELECT @Point2.STDistance(@Point3);

INSERT INTO #Source
    SELECT @Point1
        UNION ALL 
    SELECT @Point2
        UNION ALL 
    SELECT @Point3
        UNION ALL 
    SELECT @Point4

INSERT INTO #TargetSeeded
    SELECT @Point1
        UNION ALL 
    SELECT @Point2

CREATE SPATIAL INDEX SpatialIndex ON #Source([Point]);
CREATE SPATIAL INDEX SpatialIndex ON #TargetEmpty([Point]);
CREATE SPATIAL INDEX SpatialIndex ON #TargetSeeded([Point]);

-- Identify Ids to be inserted
SELECT 
    Id,
    Point
FROM #Source WHERE Id NOT IN
(
    SELECT 
        So.Id
    FROM #Source AS So
    INNER JOIN #TargetSeeded AS Ta
    ON So.Point.STDistance(Ta.Point) < @PointBufferDistanceInMeters
) 

I can identify candidates to be inserted from the #Source as set based approach in a (IMHO) efficient (?) way. I just do not get my head around on how to remove duplicates in the #Source table (duplicates in the above sense - with a buffer of 1000 meters). So ultimately, I would like id 3 or 4 to be inserted into #TargetSeeded (which one does not matter). Any ideas?

PS:

This is a correlated sub-query attempt which may be a solution:

IF OBJECT_ID('tempdb..#Source') IS NOT NULL DROP TABLE #Source
IF OBJECT_ID('tempdb..#TargetSeeded') IS NOT NULL DROP TABLE #TargetSeeded
IF OBJECT_ID('tempdb..#TargetEmpty') IS NOT NULL DROP TABLE #TargetEmpty

CREATE TABLE #Source
(
    Id INT IDENTITY(1,1) PRIMARY KEY,
    Point GEOGRAPHY
)

CREATE TABLE #TargetSeeded
(
    Id INT IDENTITY(1,1) PRIMARY KEY,
    Point GEOGRAPHY
)

CREATE TABLE #TargetEmpty
(
    Id INT IDENTITY(1,1) PRIMARY KEY,
    Point GEOGRAPHY
)

DECLARE @Point1 GEOGRAPHY;
DECLARE @Point2 GEOGRAPHY;
DECLARE @Point3 GEOGRAPHY;
DECLARE @Point4 GEOGRAPHY;
DECLARE @PointBufferDistanceInMeters INT;

SET @Point1 = GEOGRAPHY::STPointFromText('POINT(1 52.50)', 4326);
SET @Point2 = GEOGRAPHY::STPointFromText('POINT(1 52.51)', 4326);
SET @Point3 = GEOGRAPHY::STPointFromText('POINT(1 52.52)', 4326);
SET @Point4 = GEOGRAPHY::STPointFromText('POINT(1 52.52)', 4326);
SET @PointBufferDistanceInMeters = 1000;

--SELECT @Point1.STDistance(@Point2);
--SELECT @Point1.STDistance(@Point3);
--SELECT @Point1.STDistance(@Point4);
--SELECT @Point2.STDistance(@Point3);

INSERT INTO #Source
    SELECT @Point1
        UNION ALL 
    SELECT @Point2
        UNION ALL 
    SELECT @Point3
        UNION ALL 
    SELECT @Point4

INSERT INTO #TargetSeeded
    SELECT @Point1
        UNION ALL 
    SELECT @Point2

CREATE SPATIAL INDEX SpatialIndex ON #Source([Point]);
CREATE SPATIAL INDEX SpatialIndex ON #TargetEmpty([Point]);
CREATE SPATIAL INDEX SpatialIndex ON #TargetSeeded([Point]);

-- Identify Ids to be inserted
DELETE FROM #Source WHERE Id NOT IN 
(
    SELECT 
        Id
    FROM #Source WHERE Id NOT IN
    (
        SELECT 
            So.Id
        FROM #Source AS So
        INNER JOIN #TargetSeeded AS Ta
        ON So.Point.STDistance(Ta.Point) < @PointBufferDistanceInMeters
    ) 
)

SELECT 
    *
FROM #Source o
WHERE o.Id IN
(
    SELECT MAX(i.Id)
    FROM #Source i
    WHERE 
        i.Point.STDistance(o.Point) < @PointBufferDistanceInMeters
)

Solution

  • Does this help?

    -- Sample data.
    declare @Source as Table ( Id Int Identity Primary Key, Point Geography );
    insert into @Source ( Point ) values
      ( Geography::STPointFromText( 'Point( 1 52.50 )', 4326 ) ),
      ( Geography::STPointFromText( 'Point( 1 52.51 )', 4326 ) ),
      ( Geography::STPointFromText( 'Point( 1 52.52 )', 4326 ) ),
      ( Geography::STPointFromText( 'Point( 1 52.52 )', 4326 ) );
    select *, Point.ToString() as DecodedPoint from @Source;
    
    declare @Target as Table ( Id Int Identity Primary Key, Point Geography );
    insert into @Target ( Point ) values
      ( Geography::STPointFromText( 'Point( 1 52.50 )', 4326 ) ),
      ( Geography::STPointFromText( 'Point( 1 52.51 )', 4326 ) );
    select *, Point.ToString() as DecodedPoint from @Target;
    
    declare @PointBufferDistanceInMeters as Int = 1000;
    
    -- Merge the data.    
    insert into @Target
      select Point
        from @Source as S
        where
          -- Remove rows that conflict with another   Source   row.
          not exists ( select 42 from @Source where S.Point.STDistance( Point ) < @PointBufferDistanceInMeters and S.Id < Id ) and
          -- Remove rows that conflict with an existing   Target   row.
          not exists ( select 42 from @Target where S.Point.STDistance( Point ) < @PointBufferDistanceInMeters )
    select *, Point.ToString() as DecodedPoint from @Target;