I would like to produce hierarchical output in Snowflake using a CTE. Here are my two tables Dimension_territory
and territory_member_list
:
The dimension testdb.dbo.dimension_territory
contains the territory keys and the territory name
create table testdb.dbo.dimension_territory (
territory_key integer,
territory_name varchar ) ;
Here (1, 'World Wide')
is the root
insert into testdb.dbo.dimension_territory values
(1, 'WorldWide'),
(2, 'Western Hemisphere'),
(3, 'North America'),
(4, 'Canada') ;
The territory_member_list
table contains the parent child relations.
create table testdb.dbo.territory_member_list (
parent_territory_key integer,
child_territory_key integer );
insert into testdb.dbo.territory_member_list values
(1, 2), -- WorldWide , Western Hemisphere
(2, 3), -- Western Hemisphere , North America , North america under Western hemisphere
(3, 4) -- North America ,Canada , Canada under North America
In the first entry into testdb.dbo.territory_member_list (1, 2), -- WorldWide -> Western Hemisphere
the value 1 'WordlWide' is the parent of 2 'Western Hemisphere' (child) and likewise
The expected output of the CTE to derive the ancestors and the descendants should be the following table. Any help would be highly appreciated. Output table with the desired fields:
Using "breadcrumb" array as helper structure to determine position in the path(naive approach):
WITH RECURSIVE cte AS (
SELECT dt.*, tml.parent_territory_key
FROM dimension_territory dt
LEFT JOIN territory_member_list tml
ON tml.child_territory_key = dt.territory_key
), rec AS (
SELECT cte.TERRITORY_KEY, cte.TERRITORY_NAME, cte.parent_territory_key,
ARRAY_CONSTRUCT(cte.TERRITORY_KEY) AS a, 0 AS lvl
FROM cte
WHERE PARENT_TERRITORY_KEY IS NULL
UNION ALL
SELECT cte.TERRITORY_KEY, cte.TERRITORY_NAME, cte.parent_territory_key,
ARRAY_APPEND(rec.a, cte.TERRITORY_KEY), lvl+1
FROM rec
JOIN cte ON rec.TERRITORY_KEY = cte.parent_territory_key
), lognest_path AS (
SELECT * FROM rec QUALIFY lvl = MAX(lvl) OVER()
), cartesian AS (
SELECT dt1.TERRITORY_KEY AS ANCESTOR_KEY, dt1.TERRITORY_NAME AS ANCESTOR_NAME,
dt2.TERRITORY_KEY AS DESCENDANT_KEY, dt2.TERRITORY_NAME AS DESCENDANT_NAME
FROM dimension_territory dt1
CROSS JOIN dimension_territory dt2
)
SELECT DISTINCT c.*, lp.a,
ARRAY_POSITION(c.ancestor_key,lp.a) AS a_p,
ARRAY_POSITION(c.descendant_key, lp.a) AS d_p,
a_p = 0 AND d_p = 0 AS IS_ROOT,
d_p - a_p AS EDGE_DISTANCE
FROM cartesian c
JOIN lognest_path lp
ON ARRAY_POSITION(c.ancestor_key,lp.a) >= 0
AND ARRAY_POSITION(c.descendant_key, lp.a) >=0
WHERE a_p <= d_p
ORDER BY ANCESTOR_KEY, DESCENDANT_KEY;
Output: