Search code examples
sql-servert-sqlsql-server-2014

Get comma-separated set of values from table where another reference value on another table appears twice (or more)


Suppose the following DB setup in SQL Server 2014:

DECLARE @MATERIAL TABLE (ID int, CODE varchar(30));

INSERT @MATERIAL (ID, CODE) VALUES
(1, 'D3033MBBY'),
(2, 'D3033MBTY'),
(3, '011130-01'),
(4, '011130-04C'),
(5, '021002'),
(6, '021017-B'),
(7, '021134-01'),
(8, '021135-01'),
(9, '021955-01'),
(10, '3LS91101-550'),
(11, 'D3049MBRB'),
(12, 'EF0118'),
(13, 'FV8130'),
(14, 'FY7009'),
(15, 'H05802'),
(16, 'D3033MRTE');

DECLARE @SUBSTITUTE TABLE (ID int, ITEID int, SUBSTITUTECODE varchar(100));

INSERT @SUBSTITUTE (ID, ITEID, SUBSTITUTECODE) VALUES
(5232, 1, '191045762418'),
(5442, 2, '191045762418'),
(6435, 3, '5206432380030'),
(6573, 4, '5206432380030'),
(6582, 5, '5206432357131'),
(6683, 6, '5206432369486'),
(7332, 7, '5206432380610'),
(7482, 8, '5206432380818'),
(7721, 9, '5206432346029'),
(7831, 10, '5205172116350'),
(8034, 11, '191045480992'),
(8184, 12, '4061622759543'),
(8284, 13, '4062058577497'),
(8573, 14, '4064039588089'),
(9438, 15, '4064048672519'),
(9746, 16, '191045762418');

SELECT sub.SUBSTITUTECODE
FROM @SUBSTITUTE AS sub
INNER JOIN @MATERIAL AS prod ON prod.ID = sub.ITEID
GROUP BY sub.SUBSTITUTECODE
HAVING COUNT(sub.SUBSTITUTECODE) > 1;

I would like to create a query that would produce the following resultset:

CODES SUBSTITUTECODE
D3033MBBY,D3033MBTY,D3033MRTE 191045762418
011130-01,011130-04C 5206432380030

In other words, I'd like to get a comma-separated set of CODEs in @MATERIAL where there are duplicate SUBSTITUTECODE references for those records in table @SUBSTITUTE

Indirectly, I can find the CODEs that correspond to those duplicate SUBSTITUTECODEs with the following query:

SELECT prod.CODE, sub.SUBSTITUTECODE
FROM @SUBSTITUTE AS sub
INNER JOIN @MATERIAL AS prod ON prod.ID = sub.ITEID
WHERE sub.SUBSTITUTECODE IN (SELECT sub.SUBSTITUTECODE
    FROM @SUBSTITUTE AS sub
    INNER JOIN @MATERIAL AS prod ON prod.ID = sub.ITEID
    GROUP BY sub.SUBSTITUTECODE
    HAVING COUNT(sub.SUBSTITUTECODE) > 1)

Working fiddle for the above case can be found here.

Please note that the full case of this scenario runs on SQL Server 2014.

TIA


Solution

  • Nice starting fiddle, thanks! If we just take what you already have and put it in a CTE, we can write a standard string aggregation around it:

    ;WITH subs AS 
    (
      SELECT prod.CODE, sub.SUBSTITUTECODE
      FROM @SUBSTITUTE AS sub
      INNER JOIN @MATERIAL AS prod ON prod.ID = sub.ITEID
      WHERE sub.SUBSTITUTECODE IN (SELECT sub.SUBSTITUTECODE
        FROM @SUBSTITUTE AS sub
        INNER JOIN @MATERIAL AS prod ON prod.ID = sub.ITEID
        GROUP BY sub.SUBSTITUTECODE
        HAVING COUNT(sub.SUBSTITUTECODE) > 1)
    )
    SELECT CODES = STUFF((SELECT ',' + CODE 
      FROM subs AS s2 WHERE s2.SUBSTITUTECODE = subs.SUBSTITUTECODE
      FOR XML PATH(''), TYPE).value(N'./text()[1]', N'nvarchar(max)'),1,1,''),
        SUBSTITUTECODE FROM subs
      GROUP BY SUBSTITUTECODE;
    

    But we can simplify this code slightly, most importantly to avoid referencing both tables twice, like this:

    ;WITH subs AS
    (
      SELECT s.ITEID, s.SUBSTITUTECODE, m.CODE, 
        c = COUNT(*) OVER (PARTITION BY s.SUBSTITUTECODE)
      FROM @SUBSTITUTE AS s
      INNER JOIN @MATERIAL AS m
      ON m.ID = s.ITEID
    )
    SELECT CODES = STUFF((SELECT ',' + CODE
      FROM subs AS s2 WHERE s2.SUBSTITUTECODE = subs.SUBSTITUTECODE
      FOR XML PATH(''), TYPE).value(N'./text()[1]', N'nvarchar(max)'),1,1,''),
        SUBSTITUTECODE
      FROM subs 
      WHERE c > 1 
      GROUP BY SUBSTITUTECODE;
    

    Note that on more modern versions of SQL Server (2017+), STRING_AGG() makes this much easier:

    SELECT CODES = STRING_AGG(m.CODE, ','), s.SUBSTITUTECODE
      FROM @SUBSTITUTE AS s
      INNER JOIN @MATERIAL AS m
      ON m.ID = s.ITEID
      GROUP BY s.SUBSTITUTECODE
      HAVING COUNT(*) > 1;