Search code examples
sqlsql-servercountsubqueryleft-join

MSSQL: use DISTINCT, TOP selection without a GROUP BY clause


I want to use DISTINCT and TOP selection without a GROUP BY clause.

I want to select rows from [locations] and order the results by the number of rows are associated with each location in [location_photos]

I now have this (not even using TOP):

SELECT distinct(l.id),count(lp.locationid) OVER(partition by l.id) AS photos
,ISNULL(lp.locpath,'') as thumb
,l.id,l.title
FROM locations l
LEFT JOIN location_photos lp on lp.locationid=l.id
ORDER BY photos desc

This however returns a row for each row in [location_photos] (56,000), where this should be a potential maximum of the number of rows in [locations] (12,000).

I already checked here and here.

How can I select the distinct number of rows using DISTINCT and TOP?

DDL and data

CREATE TABLE [dbo].[locations](
    [id] [int] NOT NULL,
    [title] [nvarchar](500) NOT NULL,
    [createdate] [datetime] NULL,
 CONSTRAINT [PK_locs] PRIMARY KEY NONCLUSTERED 
(
    [id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
GO

ALTER TABLE [dbo].[locations] ADD  CONSTRAINT [DF_homes_createdate]  DEFAULT (getdate()) FOR [createdate]
GO

CREATE TABLE [dbo].[location_photos](
    [id] [int] IDENTITY(1,1) NOT NULL,
    [locationid] [int] NOT NULL,
    [locpath] [nvarchar](250) NOT NULL,
 CONSTRAINT [PK_location_photos] PRIMARY KEY NONCLUSTERED 
(
    [id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY]
GO

ALTER TABLE [dbo].[location_photos]  WITH CHECK ADD  CONSTRAINT [FK_locs_photos_homes] FOREIGN KEY([locationid])
REFERENCES [dbo].[locations] ([id])
ON DELETE CASCADE
GO

ALTER TABLE [dbo].[location_photos] CHECK CONSTRAINT [FK_locs_photos_homes]
GO



INSERT INTO [locations](id,title) VALUES (1,'new york')
INSERT INTO [locations](id,title) VALUES (2,'boston')
INSERT INTO [locations](id,title) VALUES (3,'chicago')
INSERT INTO [locations](id,title) VALUES (4,'los angeles')


INSERT INTO [location_photos](locationid,locpath) VALUES (1,'nyc1')
INSERT INTO [location_photos](locationid,locpath) VALUES (1,'nyc2')
INSERT INTO [location_photos](locationid,locpath) VALUES (1,'nyc3')
INSERT INTO [location_photos](locationid,locpath) VALUES (1,'nyc4')
INSERT INTO [location_photos](locationid,locpath) VALUES (1,'nyc5')

INSERT INTO [location_photos](locationid,locpath) VALUES (2,'boston1')
INSERT INTO [location_photos](locationid,locpath) VALUES (2,'boston2')
INSERT INTO [location_photos](locationid,locpath) VALUES (2,'boston3')
INSERT INTO [location_photos](locationid,locpath) VALUES (2,'boston4')

--there are not photos for chicago on purpose

INSERT INTO [location_photos](locationid,locpath) VALUES (4,'la1')
INSERT INTO [location_photos](locationid,locpath) VALUES (4,'la2')

Solution

  • I want to select rows from [locations] and order the results by the number of rows are associated with each location in [location_photos]

    You can use a correlated subquery in the ORDER BY clause:

    select l.*
    from locations l
    order by (select count(*) 
              from location_photos lp
              where lp.location_id = l.id
             ) desc;
    

    If you want the count in the query as well as the ordering criterion, just move the subquery to the select:

    select l.*,
           (select count(*) 
            from location_photos lp
            where lp.location_id = l.id
           ) as num_photos
    from locations l
    order by num_photos desc;