I'm currently trying to build a parent-child relationship on a table that does not contain this information.
it is a list of financial account numbers and a corresponding type, thats about it.
Now there is a logic to this though, header accounts group profitAndLoss accounts with a total at the end. headers can be nested and totals can be as well forming the parent child structure within this logic is my current task
I have gotten as far as to rank the changes in that should result in a switch in the parent account. But I'm a bit stuck at building my recursive query to move all the way to my desired output.
currently I'm at this level (Simplified example)
WITH accounts AS (
SELECT
accountNumber, accountType
FROM (VALUES
(1000, 'heading'),
(1001, 'heading'),
(1010, 'profitAndLoss'),
(1020, 'profitAndLoss'),
(1021, 'profitAndLoss'),
(1025, 'profitAndLoss'),
(1099, 'totalFrom'),
(1300, 'heading'),
(1310, 'profitAndLoss'),
(1320, 'profitAndLoss'),
(1321, 'profitAndLoss'),
(1399, 'totalFrom'),
(2000, 'totalFrom'),
(2200, 'heading'),
(2210, 'profitAndLoss'),
(2211, 'profitAndLoss')
) AS t(accountNumber, accountType)
),
rankedAccounts AS (
SELECT
*,
COALESCE(
CASE WHEN LAG(accountNumber) OVER(ORDER BY accountNumber) IS NULL THEN 0 END,
CASE WHEN accountType IN ('heading', 'headingStart') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('heading', 'headingStart') AND LAG(accountType, 2) OVER (ORDER BY accountNumber) IS NULL THEN 1 END,
CASE WHEN accountType IN ('heading', 'headingStart') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('heading', 'headingStart') THEN 0 END,
CASE WHEN accountType IN ('profitAndLoss', 'status') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('heading', 'headingStart') THEN 1 END,
CASE WHEN accountType IN ('heading', 'headingStart') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('totalFrom', 'sumInterval') AND LAG(accountType, 2) OVER (ORDER BY accountNumber) IN ('totalFrom', 'sumInterval') THEN 0 END,
CASE WHEN accountType IN ('heading', 'headingStart') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('totalFrom', 'sumInterval') AND LAG(accountType, 2) OVER (ORDER BY accountNumber) IN ('profitAndLoss', 'status') THEN -1 END,
CASE WHEN accountType IN ('totalFrom', 'sumInterval') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('totalFrom', 'sumInterval') THEN -1 END,
0
) Rank
FROM
accounts
),
sumRank AS (
SELECT
accountNumber,
accountType,
SUM(Rank) OVER (ORDER BY rankedAccounts.accountNumber) sumRank
FROM rankedAccounts
)
SELECT
*
FROM
sumRank
The resulting data from this query is a follows
my desired output would look like this
I can do this easily in python using a very simple loop, but I cannot wrap my head around it in T-SQL
accountDictList = [
{'accountNumber': 1000, 'accountType':'heading', 'sumRank':0},
{'accountNumber': 1001, 'accountType':'heading', 'sumRank':1},
{'accountNumber': 1010, 'accountType':'profitAndLoss', 'sumRank':2},
{'accountNumber': 1020, 'accountType':'profitAndLoss', 'sumRank':2},
{'accountNumber': 1021, 'accountType':'profitAndLoss', 'sumRank':2},
{'accountNumber': 1025, 'accountType':'profitAndLoss', 'sumRank':2},
{'accountNumber': 1099, 'accountType':'totalFrom', 'sumRank':2},
{'accountNumber': 1300, 'accountType':'heading', 'sumRank':1},
{'accountNumber': 1310, 'accountType':'profitAndLoss', 'sumRank':2},
{'accountNumber': 1320, 'accountType':'profitAndLoss', 'sumRank':2},
{'accountNumber': 1321, 'accountType':'profitAndLoss', 'sumRank':2},
{'accountNumber': 1399, 'accountType':'totalFrom', 'sumRank':2},
{'accountNumber': 2000, 'accountType':'totalFrom', 'sumRank':1},
{'accountNumber': 2200, 'accountType':'heading', 'sumRank':1},
{'accountNumber': 2210, 'accountType':'profitAndLoss', 'sumRank':2},
{'accountNumber': 2211, 'accountType':'profitAndLoss', 'sumRank':2},
]
for i, account in enumerate(accountDictList):
try:
if i == 0:
parent = None
elif account['sumRank'] > accountDictList[i-1]['sumRank']:
grandparent = parent
parent = accountDictList[i-1]['accountNumber']
elif account['sumRank'] < accountDictList[i-1]['sumRank']:
parent = grandparent
except:
parent = parent
print(account, parent)
resulting in the desired output
In Python you have variables, in sql not, so you need subqueries to get the wanted information
WITH accounts AS (
SELECT
accountNumber, accountType
FROM (VALUES
(1000, 'heading'),
(1001, 'heading'),
(1010, 'profitAndLoss'),
(1020, 'profitAndLoss'),
(1021, 'profitAndLoss'),
(1025, 'profitAndLoss'),
(1099, 'totalFrom'),
(1300, 'heading'),
(1310, 'profitAndLoss'),
(1320, 'profitAndLoss'),
(1321, 'profitAndLoss'),
(1399, 'totalFrom'),
(2000, 'totalFrom'),
(2200, 'heading'),
(2210, 'profitAndLoss'),
(2211, 'profitAndLoss')
) AS t(accountNumber, accountType)
),
rankedAccounts AS (
SELECT
*,
COALESCE(
CASE WHEN LAG(accountNumber) OVER(ORDER BY accountNumber) IS NULL THEN 0 END,
CASE WHEN accountType IN ('heading', 'headingStart') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('heading', 'headingStart') AND LAG(accountType, 2) OVER (ORDER BY accountNumber) IS NULL THEN 1 END,
CASE WHEN accountType IN ('heading', 'headingStart') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('heading', 'headingStart') THEN 0 END,
CASE WHEN accountType IN ('profitAndLoss', 'status') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('heading', 'headingStart') THEN 1 END,
CASE WHEN accountType IN ('heading', 'headingStart') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('totalFrom', 'sumInterval') AND LAG(accountType, 2) OVER (ORDER BY accountNumber) IN ('totalFrom', 'sumInterval') THEN 0 END,
CASE WHEN accountType IN ('heading', 'headingStart') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('totalFrom', 'sumInterval') AND LAG(accountType, 2) OVER (ORDER BY accountNumber) IN ('profitAndLoss', 'status') THEN -1 END,
CASE WHEN accountType IN ('totalFrom', 'sumInterval') AND LAG(accountType) OVER (ORDER BY accountNumber) IN ('totalFrom', 'sumInterval') THEN -1 END,
0
) Rank
FROM
accounts
),
sumRank AS (
SELECT
accountNumber,
accountType,
SUM(Rank) OVER (ORDER BY rankedAccounts.accountNumber) sumRank
FROM rankedAccounts
)
SELECT
*,
CASE WHEN sumrank = 0 then NULL
WHEN sumrank = 1 THEN ( SELECT accountNumber FROM sumRank WHERE sumRank = 0)
ELSE ( SELECT accountNumber FROM sumRank WHERE sumRank = 1 AND accountNumber < s1.accountNumber ORDER BY accountnumber DESC LIMIT 1)
END parent
FROM
sumRank s1
accountnumber | accounttype | sumrank | parent |
---|---|---|---|
1000 | heading | 0 | null |
1001 | heading | 1 | 1000 |
1010 | profitAndLoss | 2 | 1001 |
1020 | profitAndLoss | 2 | 1001 |
1021 | profitAndLoss | 2 | 1001 |
1025 | profitAndLoss | 2 | 1001 |
1099 | totalFrom | 2 | 1001 |
1300 | heading | 1 | 1000 |
1310 | profitAndLoss | 2 | 1300 |
1320 | profitAndLoss | 2 | 1300 |
1321 | profitAndLoss | 2 | 1300 |
1399 | totalFrom | 2 | 1300 |
2000 | totalFrom | 1 | 1000 |
2200 | heading | 1 | 1000 |
2210 | profitAndLoss | 2 | 2200 |
2211 | profitAndLoss | 2 | 2200 |