I would like to create a target table from a source table, which only contains unique/distinct points in the sense that there should be no points within the radius of 1000 meters from each other.
This is a starting point (simplified mockup using temp tables):
IF OBJECT_ID('tempdb..#Source') IS NOT NULL DROP TABLE #Source
IF OBJECT_ID('tempdb..#TargetSeeded') IS NOT NULL DROP TABLE #TargetSeeded
IF OBJECT_ID('tempdb..#TargetEmpty') IS NOT NULL DROP TABLE #TargetEmpty
CREATE TABLE #Source
(
Id INT IDENTITY(1,1) PRIMARY KEY,
Point GEOGRAPHY
)
CREATE TABLE #TargetSeeded
(
Id INT IDENTITY(1,1) PRIMARY KEY,
Point GEOGRAPHY
)
CREATE TABLE #TargetEmpty
(
Id INT IDENTITY(1,1) PRIMARY KEY,
Point GEOGRAPHY
)
DECLARE @Point1 GEOGRAPHY;
DECLARE @Point2 GEOGRAPHY;
DECLARE @Point3 GEOGRAPHY;
DECLARE @Point4 GEOGRAPHY;
DECLARE @PointBufferDistanceInMeters INT;
SET @Point1 = GEOGRAPHY::STPointFromText('POINT(1 52.50)', 4326);
SET @Point2 = GEOGRAPHY::STPointFromText('POINT(1 52.51)', 4326);
SET @Point3 = GEOGRAPHY::STPointFromText('POINT(1 52.52)', 4326);
SET @Point4 = GEOGRAPHY::STPointFromText('POINT(1 52.52)', 4326);
SET @PointBufferDistanceInMeters = 1000;
--SELECT @Point1.STDistance(@Point2);
--SELECT @Point1.STDistance(@Point3);
--SELECT @Point1.STDistance(@Point4);
--SELECT @Point2.STDistance(@Point3);
INSERT INTO #Source
SELECT @Point1
UNION ALL
SELECT @Point2
UNION ALL
SELECT @Point3
UNION ALL
SELECT @Point4
INSERT INTO #TargetSeeded
SELECT @Point1
UNION ALL
SELECT @Point2
CREATE SPATIAL INDEX SpatialIndex ON #Source([Point]);
CREATE SPATIAL INDEX SpatialIndex ON #TargetEmpty([Point]);
CREATE SPATIAL INDEX SpatialIndex ON #TargetSeeded([Point]);
-- Identify Ids to be inserted
SELECT
Id,
Point
FROM #Source WHERE Id NOT IN
(
SELECT
So.Id
FROM #Source AS So
INNER JOIN #TargetSeeded AS Ta
ON So.Point.STDistance(Ta.Point) < @PointBufferDistanceInMeters
)
I can identify candidates to be inserted from the #Source as set based approach in a (IMHO) efficient (?) way. I just do not get my head around on how to remove duplicates in the #Source table (duplicates in the above sense - with a buffer of 1000 meters). So ultimately, I would like id 3 or 4 to be inserted into #TargetSeeded (which one does not matter). Any ideas?
PS:
This is a correlated sub-query attempt which may be a solution:
IF OBJECT_ID('tempdb..#Source') IS NOT NULL DROP TABLE #Source
IF OBJECT_ID('tempdb..#TargetSeeded') IS NOT NULL DROP TABLE #TargetSeeded
IF OBJECT_ID('tempdb..#TargetEmpty') IS NOT NULL DROP TABLE #TargetEmpty
CREATE TABLE #Source
(
Id INT IDENTITY(1,1) PRIMARY KEY,
Point GEOGRAPHY
)
CREATE TABLE #TargetSeeded
(
Id INT IDENTITY(1,1) PRIMARY KEY,
Point GEOGRAPHY
)
CREATE TABLE #TargetEmpty
(
Id INT IDENTITY(1,1) PRIMARY KEY,
Point GEOGRAPHY
)
DECLARE @Point1 GEOGRAPHY;
DECLARE @Point2 GEOGRAPHY;
DECLARE @Point3 GEOGRAPHY;
DECLARE @Point4 GEOGRAPHY;
DECLARE @PointBufferDistanceInMeters INT;
SET @Point1 = GEOGRAPHY::STPointFromText('POINT(1 52.50)', 4326);
SET @Point2 = GEOGRAPHY::STPointFromText('POINT(1 52.51)', 4326);
SET @Point3 = GEOGRAPHY::STPointFromText('POINT(1 52.52)', 4326);
SET @Point4 = GEOGRAPHY::STPointFromText('POINT(1 52.52)', 4326);
SET @PointBufferDistanceInMeters = 1000;
--SELECT @Point1.STDistance(@Point2);
--SELECT @Point1.STDistance(@Point3);
--SELECT @Point1.STDistance(@Point4);
--SELECT @Point2.STDistance(@Point3);
INSERT INTO #Source
SELECT @Point1
UNION ALL
SELECT @Point2
UNION ALL
SELECT @Point3
UNION ALL
SELECT @Point4
INSERT INTO #TargetSeeded
SELECT @Point1
UNION ALL
SELECT @Point2
CREATE SPATIAL INDEX SpatialIndex ON #Source([Point]);
CREATE SPATIAL INDEX SpatialIndex ON #TargetEmpty([Point]);
CREATE SPATIAL INDEX SpatialIndex ON #TargetSeeded([Point]);
-- Identify Ids to be inserted
DELETE FROM #Source WHERE Id NOT IN
(
SELECT
Id
FROM #Source WHERE Id NOT IN
(
SELECT
So.Id
FROM #Source AS So
INNER JOIN #TargetSeeded AS Ta
ON So.Point.STDistance(Ta.Point) < @PointBufferDistanceInMeters
)
)
SELECT
*
FROM #Source o
WHERE o.Id IN
(
SELECT MAX(i.Id)
FROM #Source i
WHERE
i.Point.STDistance(o.Point) < @PointBufferDistanceInMeters
)
Does this help?
-- Sample data.
declare @Source as Table ( Id Int Identity Primary Key, Point Geography );
insert into @Source ( Point ) values
( Geography::STPointFromText( 'Point( 1 52.50 )', 4326 ) ),
( Geography::STPointFromText( 'Point( 1 52.51 )', 4326 ) ),
( Geography::STPointFromText( 'Point( 1 52.52 )', 4326 ) ),
( Geography::STPointFromText( 'Point( 1 52.52 )', 4326 ) );
select *, Point.ToString() as DecodedPoint from @Source;
declare @Target as Table ( Id Int Identity Primary Key, Point Geography );
insert into @Target ( Point ) values
( Geography::STPointFromText( 'Point( 1 52.50 )', 4326 ) ),
( Geography::STPointFromText( 'Point( 1 52.51 )', 4326 ) );
select *, Point.ToString() as DecodedPoint from @Target;
declare @PointBufferDistanceInMeters as Int = 1000;
-- Merge the data.
insert into @Target
select Point
from @Source as S
where
-- Remove rows that conflict with another Source row.
not exists ( select 42 from @Source where S.Point.STDistance( Point ) < @PointBufferDistanceInMeters and S.Id < Id ) and
-- Remove rows that conflict with an existing Target row.
not exists ( select 42 from @Target where S.Point.STDistance( Point ) < @PointBufferDistanceInMeters )
select *, Point.ToString() as DecodedPoint from @Target;