I just started in a new project, in a new company.
I was given a big and complex SQL, with about 1000 lines and MANY subqueries, joins, sums, group by, etc.
This SQL is used for report generation (it has no inserts nor updates).
The SQL has some flaws, and my first job in the company is to identify and correct these flaws so that the report shows the correct values (I know the correct values by accessing a legacy system written in Cobol...)
How can I make it easier for me to understand the query, so I can identify the flaws?
As an experienced Java programmer, I know how to refactor a complex bad written monolitic Java code into an easier to understand code with small pieces of code. But I have no clue on how to do that with SQL.
The SQL looks like this:
SELECT columns
FROM
(SELECT columns
FROM
(SELECT DISTINCT columns
FROM table000 alias000
INNER JOIN
table000 alias000
ON column000 = table000.column000
LEFT JOIN
(SELECT columns
FROM (
SELECT DISTINCT columns
FROM columns
WHERE conditions) AS alias000
GROUP BY columns ) alias000
ON
conditions
WHERE conditions
) AS alias000
LEFT JOIN
(SELECT
columns
FROM many_tables
WHERE many_conditions
) )
) AS alias000
ON condition
LEFT JOIN (
SELECT columns
FROM
(SELECT
columns
FROM
many_tables
WHERE many_conditions
) ) ) AS alias001
,
(SELECT
many_columns
FROM
many_tables
WHERE many_conditions) AS alias001
) AS alias001
ON condition
LEFT JOIN
(SELECT
many_columns
FROM many_tables
WHERE many_conditions
) AS alias001
ON condition
,
(SELECT DISTINCT columns
FROM table001 alias001
INNER JOIN
table001 alias001
ON condition
LEFT JOIN
(SELECT columns
FROM (
SELECT DISTINCT columns
FROM tables
WHERE conditions
) AS alias001
GROUP BY
columns ) alias001
ON
condition
WHERE
conditions
) AS alias001
LEFT JOIN
(SELECT columns
FROM tables
WHERE conditions
) AS alias001
ON condition
LEFT JOIN (
SELECT columns
FROM
(SELECT columns
FROM tables
WHERE conditions ) AS alias001
,
(SELECT
columns
FROM
tables
WHERE conditions ) AS alias001
) AS alias001
ON condition
LEFT JOIN
(SELECT
columns
FROM
tables
WHERE conditions
) AS alias001
ON condition
WHERE
condition
) AS alias001
order by column001
How can I make it easier for me to understand the query, so I can identify the flaws?
The solution was to simplify the query using COMMON TABLE EXPRESSIONS.
This allowed me to break the big and complex SQL query into many small and easy to understand queries.
COMMON TABLE EXPRESSIONS:
WITH cte (Column1, Column2, Column3)
AS
(
SELECT Column1, Column2, Column3
FROM SomeTable
)
SELECT * FROM cte
My new SQL looks like this:
------------------------------------------
--COMMON TABLE EXPRESSION 001--
------------------------------------------
WITH alias001 (column001, column002) AS (
SELECT column005, column006
FROM table001
WHERE condition001
GROUP by column008
)
--------------------------------------------
--COMMON TABLE EXPRESSION 002 --
--------------------------------------------
, alias002 (column009) as (
select distinct column009 from table002
)
--------------------------------------------
--COMMON TABLE EXPRESSION 003 --
--------------------------------------------
, alias003 (column1, column2, column3) as (
SELECT '1' AS column1, '1' as column2, 'name001' AS column3 FROM SYSIBM.SYSDUMMY1
UNION ALL
SELECT '1' AS column1, '1.1' as column2, 'name002' AS column3 FROM SYSIBM.SYSDUMMY1
UNION ALL
SELECT '1' AS column1, '1.2' as column2, 'name003' AS column3 FROM SYSIBM.SYSDUMMY1
UNION ALL
SELECT '2' AS column1, '2' as column2, 'name004' AS column3 FROM SYSIBM.SYSDUMMY1
UNION ALL
SELECT '2' AS column1, '2.1' as column2, 'name005' AS column3 FROM SYSIBM.SYSDUMMY1
UNION ALL
SELECT '2' AS column1, '2.2' as column2, 'name006' AS column3 FROM SYSIBM.SYSDUMMY1
UNION ALL
SELECT '3' AS column1, '3' as column2, 'name007' AS column3 FROM SYSIBM.SYSDUMMY1
UNION ALL
SELECT '3' AS column1, '3.1' as column2, 'name008' AS column3 FROM SYSIBM.SYSDUMMY1
)
--------------------------------------------
--COMMON TABLE EXPRESSION 004 --
--------------------------------------------
, alias004 (column1) as (
select distinct column1 from table003
)
------------------------------------------------------
--COMMON TABLE EXPRESSION 005 --
------------------------------------------------------
, alias005 (column1, column2) as (
select column1, column2 from alias002, alias004
)
------------------------------------------------------
--COMMON TABLE EXPRESSION 006 --
------------------------------------------------------
, alias006 (column1, column2, column3, column4) as (
SELECT column1, column2, column3, sum(column0) as column4
FROM table004
LEFT JOIN table005 ON column01 = column02
group by column1, column2, column3
)
------------------------------------------------------
--COMMON TABLE EXPRESSION 007 --
------------------------------------------------------
, alias007 (column1, column2, column3, column4) as (
SELECT column1, column2, column3, sum(column0) as column4
FROM table006
LEFT JOIN table007 ON column01 = column02
group by column1, column2, column3
)
------------------------------------------------------
--COMMON TABLE EXPRESSION 008 --
------------------------------------------------------
, alias008 (column1, column2, column3, column4) as (
select column1, column2, column3, column4 from alias007 where column5 = 123
)
----------------------------------------------------------
--COMMON TABLE EXPRESSION 009 --
----------------------------------------------------------
, alias009 (column1, column2, column3, column4) as (
select column1, column2,
CASE WHEN column3 IS NOT NULL THEN column3 ELSE 0 END as column3,
CASE WHEN column4 IS NOT NULL THEN column4 ELSE 0 END as column4
from table007
)
----------------------------------------------------------
--COMMON TABLE EXPRESSION 010 --
----------------------------------------------------------
, alias010 (column1, column2, column3) as (
select column1, sum(column4), sum(column5)
from alias009
where column6 < 2005
group by column1
)
--------------------------------------------
-- MAIN QUERY --
--------------------------------------------
select j.column1, n.column2, column3, column4, column5, column6,
column3 + column5 AS column7,
column4 + column6 AS column8
from alias010 j
left join alias006 m ON (m.column1 = j.column1)
left join alias008 n ON (n.column1 = j.column1)