Search code examples
t-sqlaggregate-functionssql-server-2014

Sum a Specific Column When Multiple Records Exist


I'm trying to sum a specific column to get a number total for each ID - but my result set is returning a result set that is a multiple of 6.

For example, the request_ID in my query has six docket_IDs associated with it and each docket_ID has six message_document_IDs associated.

Query:

SELECT r.id,
r.number_of_documents,
rc.docket_id,
rc.sequence_no,
rc.case_number_from_filer,
rd.number_of_pages,
rd.message_document_id,
r.received_date
FROM dbo.requests AS [r]
INNER JOIN dbo.request_court_documents_additonal_information AS [rc]
    ON r.id = rc.request_id
INNER JOIN dbo.request_document_additonal_information AS [rd]
    ON rd.request_id = r.id
WHERE CAST(r.received_date AS DATE) >= '10/1/2015'
  AND CAST(r.received_date AS DATE) <= '9/30/2016'
  AND rc.case_number_from_filer = 'NEW CASE'
  AND r.id = 32700637
ORDER BY r.id,
rc.docket_id,
rd.message_document_id;

The data set looks like:

id        docket_id  case_number  nbr_of_pgs  msg_doc_id  received_date
32700637  197085524  NEW CASE     18          DOC00001    2015-10-01 00:01:32.590
32700637  197085524  NEW CASE     1           DOC00002    2015-10-01 00:01:32.590
32700637  197085524  NEW CASE     4           DOC00003    2015-10-01 00:01:32.590
32700637  197085524  NEW CASE     9           DOC00004    2015-10-01 00:01:32.590
32700637  197085524  NEW CASE     9           DOC00005    2015-10-01 00:01:32.590
32700637  197085524  NEW CASE     3           DOC00006    2015-10-01 00:01:32.590
32700637  197085906  NEW CASE     18          DOC00001    2015-10-01 00:01:32.590
32700637  197085906  NEW CASE     1           DOC00002    2015-10-01 00:01:32.590
32700637  197085906  NEW CASE     4           DOC00003    2015-10-01 00:01:32.590
32700637  197085906  NEW CASE     9           DOC00004    2015-10-01 00:01:32.590
32700637  197085906  NEW CASE     9           DOC00005    2015-10-01 00:01:32.590
32700637  197085906  NEW CASE     3           DOC00006    2015-10-01 00:01:32.590
32700637  197085941  NEW CASE     18          DOC00001    2015-10-01 00:01:32.590
32700637  197085941  NEW CASE     1           DOC00002    2015-10-01 00:01:32.590
32700637  197085941  NEW CASE     4           DOC00003    2015-10-01 00:01:32.590
32700637  197085941  NEW CASE     9           DOC00004    2015-10-01 00:01:32.590
32700637  197085941  NEW CASE     9           DOC00005    2015-10-01 00:01:32.590
32700637  197085941  NEW CASE     3           DOC00006    2015-10-01 00:01:32.590

This repeats itself three more times - so I return 36 rows worth of data and when I try to SUM (rd.number_of_pages) - my result is 6 times what I am expecting: 264 when it should be 44.

SELECT r.id,
SUM(rd.number_of_pages) AS [Pages]
FROM dbo.requests AS [r]
INNER JOIN dbo.request_court_documents_additonal_information AS [rc]
    ON r.id = rc.request_id
INNER JOIN dbo.request_document_additonal_information AS [rd]
    ON rd.request_id = r.id
WHERE CAST(r.received_date AS DATE) >= '10/1/2015'
  AND CAST(r.received_date AS DATE) <= '9/30/2016'
  AND rc.case_number_from_filer = 'NEW CASE'
  AND r.id = 32700637
GROUP BY r.id;

How can I eliminate the duplicate values from the docket_ids to return a true summation of the records?

Thanks,


Solution

  • Here's a way usint a correlated sub-query

    if object_id('tempdb..#t') is not null drop table #t
    
    create table #t (id int, docket_id int, case_number varchar(64), nbr_of_pgs int, msg_doc_id varchar(64), received_date datetime)
    insert into #t values
    (32700637,197085524,'NEW CASE',18,'DOC00001','2015-10-01 00:01:32.590'),
    (32700637,197085524,'NEW CASE',1,'DOC00002','2015-10-01 00:01:32.590'),
    (32700637,197085524,'NEW CASE',4,'DOC00003','2015-10-01 00:01:32.590'),
    (32700637,197085524,'NEW CASE',9,'DOC00004','2015-10-01 00:01:32.590'),
    (32700637,197085524,'NEW CASE',9,'DOC00005','2015-10-01 00:01:32.590'),
    (32700637,197085524,'NEW CASE',3,'DOC00006','2015-10-01 00:01:32.590'),
    (32700637,197085906,'NEW CASE',18,'DOC00001','2015-10-01 00:01:32.590'),
    (32700637,197085906,'NEW CASE',1,'DOC00002','2015-10-01 00:01:32.590'),
    (32700637,197085906,'NEW CASE',4,'DOC00003','2015-10-01 00:01:32.590'),
    (32700637,197085906,'NEW CASE',9,'DOC00004','2015-10-01 00:01:32.590'),
    (32700637,197085906,'NEW CASE',9,'DOC00005','2015-10-01 00:01:32.590'),
    (32700637,197085906,'NEW CASE',3,'DOC00006','2015-10-01 00:01:32.590'),
    (32700637,197085941,'NEW CASE',18,'DOC00001','2015-10-01 00:01:32.590'),
    (32700637,197085941,'NEW CASE',1,'DOC00002','2015-10-01 00:01:32.590'),
    (32700637,197085941,'NEW CASE',4,'DOC00003','2015-10-01 00:01:32.590'),
    (32700637,197085941,'NEW CASE',9,'DOC00004','2015-10-01 00:01:32.590'),
    (32700637,197085941,'NEW CASE',9,'DOC00005','2015-10-01 00:01:32.590'),
    (32700637,197085941,'NEW CASE',3,'DOC00006','2015-10-01 00:01:32.590')
    
    select distinct id, ct as TotalNum 
    from(
        select distinct
            id,
            docket_id,
            sum(nbr_of_pgs) as ct
        from 
            #t
        group by
            id, docket_id)x
    

    Your Query

    select distinct id, pages
    from(
        SELECT 
            r.id,
            rc.docket_id,
            SUM(rd.number_of_pages) AS [Pages]
        FROM dbo.requests AS [r]
        INNER JOIN dbo.request_court_documents_additonal_information AS [rc]
            ON r.id = rc.request_id
        INNER JOIN dbo.request_document_additonal_information AS [rd]
            ON rd.request_id = r.id
        WHERE CAST(r.received_date AS DATE) >= '10/1/2015'
          AND CAST(r.received_date AS DATE) <= '9/30/2016'
          AND rc.case_number_from_filer = 'NEW CASE'
          AND r.id = 32700637
        GROUP BY r.id, rc.docket_id) x