I am trying to group a list of accounts if they share same phone numbers directly or indirectly using SQL query in recursive way. I was trying traversing the parent-child hierarchy like most SQL recursive examples do, but it doesn't work because each parent in my case could have multiple children. I am expecting the query to return all three accounts within the same group.
e.g.
AccountID PhoneNumber
1 1111111111
1 2222222222
2 1111111111
2 4444444444
3 3333333333
3 4444444444
I have tried common approach to this recursive problem in SQL by defining base case and recursive case, but I am getting "The maximum recursion 100 has been exhausted before statement completion." error.
-- Step 1: Define a recursive CTE to find connected components
WITH ConnectedAccounts AS (
-- Base case: Select each account and its direct connections
SELECT
a.AccountID AS RootAccount,
a.AccountID,
a.PhoneNumber,
level = 0
FROM
AccountPhoneNumbers a
UNION ALL
-- Recursive case: Connect accounts sharing a phone number
SELECT
c.RootAccount,
b.AccountID,
b.PhoneNumber,
c.level + 1
FROM
AccountPhoneNumbers b
JOIN ConnectedAccounts c ON b.PhoneNumber = c.PhoneNumber
WHERE
b.AccountID <> c.AccountID -- Prevent looping back to the same account
),
-- Step 2: Select distinct groupings from the recursive result
DistinctGroups AS (
SELECT DISTINCT
RootAccount,
AccountID
FROM
ConnectedAccounts
)
-- Step 3: Aggregate accounts into groups based on their connected root account
SELECT
RootAccount,
STRING_AGG(AccountID, ',') AS ConnectedAccounts
FROM
DistinctGroups
GROUP BY
RootAccount
Here's a couple of versions.
The graph (might be slow):
with
nodes as (
v(grp)
select accountid as id, phonenumber as grp
from
(
VALUES (1, 1111111111)
, (1, 2222222222)
, (2, 1111111111)
, (2, 4444444444)
, (3, 3333333333)
, (3, 4444444444)
, (4, 11111111113)
, (5, 11111111113)
, (5, 1111111111)
) t (AccountID,PhoneNumber)
),
edges as (
select distinct n1.id as id1, n2.id as id2
from nodes n1
inner join nodes n2 on n1.grp = n2.grp
),
rec as (
select id1, id2, cast(id1 as nvarchar(max)) as visited from edges
union all
select r.id1, e.id2, concat(r.visited, ',', e.id2)
from rec r
inner join edges e on e.id1 = r.id2
where concat(',', r.visited, ',') not like concat('%,', e.id2, ',%')
),
fin as (
select id1, min(value) min_id
from rec r
cross apply string_split(r.visited, ',')
group by id1
)
select id1 as id, dense_rank() over(order by min_id) grp
from fin f
The loop (probably needs index on phone):
SELECT AccountID AS ID, PhoneNumber AS Number, CAST(NULL AS INT) AS ID_To
INTO #t
FROM
(
VALUES (1, 1111111111)
, (1, 2222222222)
, (2, 1111111111)
, (2, 4444444444)
, (3, 3333333333)
, (3, 4444444444)
, (4, 11111111113)
, (5, 11111111113)
, (5, 1111111111)
, (6, 4444444444)
, (7, 3333333333)
, (8, 13)
, (8, 7)
, (8, 3)
, (9, 13)
, (10, 14)
) t (AccountID,PhoneNumber)
-- Create index on Number
WHILE @@rowcount > 0
BEGIN
UPDATE t3
SET ID_TO = x.ID_to
FROm (
SELECT t2.ID, ID_to = MIN(ISNULL(t.ID_To, t.ID))
FROM #t t
INNER JOIN #t t2
ON ISNULL(t.id_To, t.id) < ISNULL(t2.ID_to, t2.id)
AND t.Number = t2.Number
AND t.ID <> t2.ID
GROUP BY t2.ID
) x
INNER JOIN #t t3 -- Update ALL rows to the lowest value
ON t3.ID = x.ID
END
-- See created groups
SELECT ISNULL(ID_TO, ID) AS groups
FROM #t
GROUP BY ISNULL(ID_TO, ID)