I have a table with rowID, longitude, latitude, businessName, url, caption. This might look like:
rowID | long | lat | businessName | url | caption
1 20 -20 Pizza Hut yum.com null
How do I delete all of the duplicates, but only keep the one that has a URL (first priority), or keep the one that has a caption if the other doesn't have a URL (second priority) and delete the rest?
Here's my looping technique. This will probably get voted down for not being mainstream - and I'm cool with that.
DECLARE @LoopVar int
DECLARE
@long int,
@lat int,
@businessname varchar(30),
@winner int
SET @LoopVar = (SELECT MIN(rowID) FROM Locations)
WHILE @LoopVar is not null
BEGIN
--initialize the variables.
SELECT
@long = null,
@lat = null,
@businessname = null,
@winner = null
-- load data from the known good row.
SELECT
@long = long,
@lat = lat,
@businessname = businessname
FROM Locations
WHERE rowID = @LoopVar
--find the winning row with that data
SELECT top 1 @Winner = rowID
FROM Locations
WHERE @long = long
AND @lat = lat
AND @businessname = businessname
ORDER BY
CASE WHEN URL is not null THEN 1 ELSE 2 END,
CASE WHEN Caption is not null THEN 1 ELSE 2 END,
RowId
--delete any losers.
DELETE FROM Locations
WHERE @long = long
AND @lat = lat
AND @businessname = businessname
AND @winner != rowID
-- prep the next loop value.
SET @LoopVar = (SELECT MIN(rowID) FROM Locations WHERE @LoopVar < rowID)
END