Search code examples
sqlrecursionhierarchy

Creating hierarchy in SQL Table


I'm currently trying to build a parent-child relationship on a table that does not contain this information.

it is a list of financial account numbers and a corresponding type, thats about it.

Now there is a logic to this though, header accounts group profitAndLoss accounts with a total at the end. headers can be nested and totals can be as well forming the parent child structure within this logic is my current task

I have gotten as far as to rank the changes in that should result in a switch in the parent account. But I'm a bit stuck at building my recursive query to move all the way to my desired output.

currently I'm at this level (Simplified example)

WITH accounts AS (
    SELECT 
        accountNumber, accountType

    FROM (VALUES
        (1000, 'heading'),
        (1001, 'heading'),
        (1010, 'profitAndLoss'),
        (1020, 'profitAndLoss'),
        (1021, 'profitAndLoss'),
        (1025, 'profitAndLoss'),
        (1099, 'totalFrom'),
        (1300, 'heading'),
        (1310, 'profitAndLoss'),
        (1320, 'profitAndLoss'),
        (1321, 'profitAndLoss'),
        (1399, 'totalFrom'),
        (2000, 'totalFrom'),
        (2200, 'heading'),
        (2210, 'profitAndLoss'),
        (2211, 'profitAndLoss')
    ) AS t(accountNumber, accountType)
),

rankedAccounts AS (
    SELECT
        *,
        COALESCE(
            CASE WHEN LAG(accountNumber) OVER(ORDER BY accountNumber) IS NULL THEN 0 END,
            CASE WHEN accountType IN ('heading', 'headingStart') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('heading', 'headingStart') AND LAG(accountType, 2) OVER (ORDER BY accountNumber) IS NULL THEN 1 END,
            CASE WHEN accountType IN ('heading', 'headingStart') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('heading', 'headingStart') THEN 0 END,
            CASE WHEN accountType IN ('profitAndLoss', 'status') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('heading', 'headingStart') THEN 1 END,
            CASE WHEN accountType IN ('heading', 'headingStart') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('totalFrom', 'sumInterval') AND LAG(accountType, 2) OVER (ORDER BY accountNumber) IN ('totalFrom', 'sumInterval') THEN 0 END,
            CASE WHEN accountType IN ('heading', 'headingStart') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('totalFrom', 'sumInterval') AND LAG(accountType, 2) OVER (ORDER BY accountNumber) IN ('profitAndLoss', 'status') THEN -1 END,
            CASE WHEN accountType IN ('totalFrom', 'sumInterval') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('totalFrom', 'sumInterval') THEN -1 END,
            0
        ) Rank

    FROM
        accounts
),

sumRank AS (
    SELECT 
        accountNumber,
        accountType,
        SUM(Rank) OVER (ORDER BY rankedAccounts.accountNumber) sumRank
    
    FROM rankedAccounts
)

SELECT
    *

FROM
    sumRank

The resulting data from this query is a follows

enter image description here

my desired output would look like this

enter image description here

I can do this easily in python using a very simple loop, but I cannot wrap my head around it in T-SQL

accountDictList = [
    {'accountNumber': 1000, 'accountType':'heading', 'sumRank':0},
    {'accountNumber': 1001, 'accountType':'heading', 'sumRank':1},
    {'accountNumber': 1010, 'accountType':'profitAndLoss', 'sumRank':2},
    {'accountNumber': 1020, 'accountType':'profitAndLoss', 'sumRank':2},
    {'accountNumber': 1021, 'accountType':'profitAndLoss', 'sumRank':2},
    {'accountNumber': 1025, 'accountType':'profitAndLoss', 'sumRank':2},
    {'accountNumber': 1099, 'accountType':'totalFrom', 'sumRank':2},
    {'accountNumber': 1300, 'accountType':'heading', 'sumRank':1},
    {'accountNumber': 1310, 'accountType':'profitAndLoss', 'sumRank':2},
    {'accountNumber': 1320, 'accountType':'profitAndLoss', 'sumRank':2},
    {'accountNumber': 1321, 'accountType':'profitAndLoss', 'sumRank':2},
    {'accountNumber': 1399, 'accountType':'totalFrom', 'sumRank':2},
    {'accountNumber': 2000, 'accountType':'totalFrom', 'sumRank':1},
    {'accountNumber': 2200, 'accountType':'heading', 'sumRank':1},
    {'accountNumber': 2210, 'accountType':'profitAndLoss', 'sumRank':2},
    {'accountNumber': 2211, 'accountType':'profitAndLoss', 'sumRank':2},
]

for i, account in enumerate(accountDictList):
    try:
        if i == 0:
            parent = None
        elif account['sumRank'] > accountDictList[i-1]['sumRank']:
            grandparent = parent
            parent = accountDictList[i-1]['accountNumber']
        elif account['sumRank'] < accountDictList[i-1]['sumRank']:
            parent = grandparent
    except:
        parent = parent

    print(account, parent)

resulting in the desired output

enter image description here


Solution

  • In Python you have variables, in sql not, so you need subqueries to get the wanted information

    WITH accounts AS (
        SELECT 
            accountNumber, accountType
    
        FROM (VALUES
            (1000, 'heading'),
            (1001, 'heading'),
            (1010, 'profitAndLoss'),
            (1020, 'profitAndLoss'),
            (1021, 'profitAndLoss'),
            (1025, 'profitAndLoss'),
            (1099, 'totalFrom'),
            (1300, 'heading'),
            (1310, 'profitAndLoss'),
            (1320, 'profitAndLoss'),
            (1321, 'profitAndLoss'),
            (1399, 'totalFrom'),
            (2000, 'totalFrom'),
            (2200, 'heading'),
            (2210, 'profitAndLoss'),
            (2211, 'profitAndLoss')
        ) AS t(accountNumber, accountType)
    ),
    
    rankedAccounts AS (
        SELECT
            *,
            COALESCE(
                CASE WHEN LAG(accountNumber) OVER(ORDER BY accountNumber) IS NULL THEN 0 END,
                CASE WHEN accountType IN ('heading', 'headingStart') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('heading', 'headingStart') AND LAG(accountType, 2) OVER (ORDER BY accountNumber) IS NULL THEN 1 END,
                CASE WHEN accountType IN ('heading', 'headingStart') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('heading', 'headingStart') THEN 0 END,
                CASE WHEN accountType IN ('profitAndLoss', 'status') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('heading', 'headingStart') THEN 1 END,
                CASE WHEN accountType IN ('heading', 'headingStart') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('totalFrom', 'sumInterval') AND LAG(accountType, 2) OVER (ORDER BY accountNumber) IN ('totalFrom', 'sumInterval') THEN 0 END,
                CASE WHEN accountType IN ('heading', 'headingStart') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('totalFrom', 'sumInterval') AND LAG(accountType, 2) OVER (ORDER BY accountNumber) IN ('profitAndLoss', 'status') THEN -1 END,
                CASE WHEN accountType IN ('totalFrom', 'sumInterval') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('totalFrom', 'sumInterval') THEN -1 END,
                0
            ) Rank
    
        FROM
            accounts
    ),
    
    sumRank AS (
        SELECT 
            accountNumber,
            accountType,
            SUM(Rank) OVER (ORDER BY rankedAccounts.accountNumber) sumRank
        
        FROM rankedAccounts
    )
    
    SELECT
        *,
     CASE WHEN sumrank = 0 then NULL
      WHEN sumrank = 1 THEN ( SELECT accountNumber FROM sumRank WHERE sumRank = 0)
      ELSE ( SELECT accountNumber FROM sumRank WHERE sumRank = 1 AND accountNumber < s1.accountNumber ORDER BY accountnumber DESC LIMIT 1)
      END parent
    FROM
        sumRank s1
    
    
    accountnumber accounttype sumrank parent
    1000 heading 0 null
    1001 heading 1 1000
    1010 profitAndLoss 2 1001
    1020 profitAndLoss 2 1001
    1021 profitAndLoss 2 1001
    1025 profitAndLoss 2 1001
    1099 totalFrom 2 1001
    1300 heading 1 1000
    1310 profitAndLoss 2 1300
    1320 profitAndLoss 2 1300
    1321 profitAndLoss 2 1300
    1399 totalFrom 2 1300
    2000 totalFrom 1 1000
    2200 heading 1 1000
    2210 profitAndLoss 2 2200
    2211 profitAndLoss 2 2200

    fiddle