I have a table like this:
t104f005_employee_no | t104f040_position_no | t104f025_date_effective | t104f030_date_to |
---|---|---|---|
11354 | 89043 | 01/07/1999 | 01/04/2012 |
11354 | 89043 | 02/04/2012 | 31/08/2014 |
11354 | 89043 | 01/09/2014 | 21/09/2014 |
11354 | 89043 | 22/09/2014 | 27/09/2015 |
11354 | 23273 | 04/05/2015 | 27/09/2015 |
11354 | 94040 | 28/09/2015 | 10/01/2016 |
11354 | 94040 | 11/01/2016 | 08/05/2017 |
11354 | 94040 | 09/05/2017 | 24/03/2019 |
11354 | 99406 | 26/02/2018 | 01/07/2018 |
11354 | 99406 | 02/07/2018 | 30/09/2018 |
11354 | 99406 | 01/10/2018 | 01/11/2018 |
11354 | 97293 | 02/11/2018 | 25/11/2018 |
11354 | 99406 | 26/11/2018 | 10/03/2019 |
11354 | 97293 | 11/03/2019 | 24/03/2019 |
11354 | 99406 | 25/03/2019 | 30/09/2019 |
11354 | 99406 | 01/10/2019 | 15/03/2020 |
11354 | 97293 | 11/11/2019 | 24/11/2019 |
11354 | 99406 | 16/03/2020 | 10/05/2020 |
11354 | 101808 | 11/05/2020 | 14/06/2020 |
11354 | 99406 | 11/05/2020 | 14/06/2020 |
11354 | 101808 | 15/06/2020 | 04/10/2020 |
11354 | 99406 | 15/06/2020 | 04/10/2020 |
11354 | 101808 | 05/10/2020 | 25/10/2020 |
11354 | 99406 | 05/10/2020 | 21/11/2021 |
11354 | 101808 | 26/10/2020 | 31/01/2021 |
11354 | 101808 | 01/02/2021 | 28/02/2021 |
11354 | 101808 | 01/03/2021 | 30/06/2021 |
11354 | 101806 | 01/07/2021 | 18/07/2021 |
11354 | 101808 | 19/07/2021 | 21/11/2021 |
11354 | 101808 | 22/11/2021 | 28/11/2021 |
11354 | 99406 | 22/11/2021 | 28/11/2021 |
11354 | 101808 | 29/11/2021 | 20/03/2022 |
11354 | 101806 | 21/03/2022 | 17/04/2022 |
11354 | 101808 | 21/03/2022 | 22/05/2022 |
11354 | 101808 | 23/05/2022 | 09/04/2023 |
11354 | 101903 | 15/08/2022 | 12/02/2023 |
11354 | 105009 | 13/02/2023 | 28/02/2023 |
11354 | 105009 | 01/03/2023 | 09/04/2023 |
11354 | 101808 | 10/04/2023 | NULL |
11354 | 80717 | 15/05/2023 | 12/05/2024 |
And I need to be able to identify the continuous position no periods with the min start and max end date outputted for each continuous period. Expected outcome would look like this:
t104f005_employee_no | t104f040_position_no | t104f025_date_effective | t104f030_date_to |
---|---|---|---|
11354 | 89043 | 01/07/1999 | 27/09/2015 |
11354 | 23273 | 04/05/2015 | 27/09/2015 |
11354 | 94040 | 28/09/2015 | 24/03/2019 |
11354 | 99406 | 26/02/2018 | 01/11/2018 |
11354 | 97293 | 02/11/2018 | 25/11/2018 |
11354 | 99406 | 26/11/2018 | 10/03/2019 |
11354 | 97293 | 11/03/2019 | 24/03/2019 |
11354 | 99406 | 25/03/2019 | 28/11/2021 |
11354 | 97293 | 11/11/2019 | 24/11/2019 |
11354 | 101808 | 11/05/2020 | 30/06/2021 |
11354 | 101806 | 01/07/2021 | 18/07/2021 |
11354 | 101808 | 19/07/2021 | NULL |
11354 | 101806 | 21/03/2022 | 17/04/2022 |
11354 | 101903 | 15/08/2022 | 12/02/2023 |
11354 | 105009 | 13/02/2023 | 28/02/2023 |
11354 | 105009 | 01/03/2023 | 09/04/2023 |
11354 | 80717 | 15/05/2023 | 12/05/2024 |
I have tried the following methods but it's not working out and I'm a bit stumped now.
WITH RankedPositions AS (
SELECT
[t104f005_employee_no],
[t104f040_position_no],
[t104f025_date_effective],
[t104f030_date_to],
ROW_NUMBER() OVER (PARTITION BY [t104f005_employee_no] ORDER BY [t104f025_date_effective]) -
ROW_NUMBER() OVER (PARTITION BY [t104f005_employee_no], [t104f040_position_no] ORDER BY [t104f025_date_effective]) AS grp
FROM [AUR11PROD].[dbo].[t104_employment_history] with (nolock)
WHERE [t104f005_employee_no] = '11354'
)
SELECT
[t104f005_employee_no],
[t104f040_position_no],
MIN([t104f025_date_effective]) AS min_startdate,
MAX([t104f030_date_to]) AS max_enddate
FROM RankedPositions
GROUP BY [t104f005_employee_no], [t104f040_position_no], grp
ORDER BY [t104f005_employee_no], min_startdate;
and this one
WITH RECURSIVE ContinuousPositions AS (
SELECT
employee_no,
position_no,
startdate,
enddate
FROM your_table_name
WHERE NOT EXISTS (
SELECT 1
FROM your_table_name AS t2
WHERE t2.employee_no = your_table_name.employee_no
AND t2.position_no = your_table_name.position_no
AND t2.startdate < your_table_name.startdate
)
UNION ALL
SELECT
cp.employee_no,
cp.position_no,
cp.startdate,
t.enddate
FROM ContinuousPositions AS cp
JOIN your_table_name AS t ON (
cp.employee_no = t.employee_no
AND cp.position_no = t.position_no
AND cp.enddate = DATEADD(day, -1, t.startdate)
)
)
SELECT
employee_no,
position_no,
MIN(startdate) AS min_startdate,
MAX(enddate) AS max_enddate
FROM ContinuousPositions
GROUP BY employee_no, position_no
ORDER BY employee_no, min_startdate;
This is a gaps and islands problem. One way is to use LEAD()
or LAG()
to identify when the continuous date break and set a flag (g
). Performing a cumulative sum() on the flag gives you the required grouping (grp
).
with
cte as
(
select employee_no, position_no, date_effective, date_expiry,
g = case when dateadd(day, -1, date_effective)
<> lag(date_expiry) over (partition by employee_no, position_no
order by date_effective)
then 1
else 0
end
from employment_history
),
cte2 as
(
select employee_no, position_no, date_effective,
date_expiry = isnull(date_expiry, '99991231'),
grp = sum(g) over (partition by employee_no, position_no
order by date_effective)
from cte
)
select employee_no, position_no,
date_effective = min(date_effective),
date_expiry = nullif(max(date_expiry), '99991231')
from cte2
group by employee_no, position_no, grp
order by employee_no, date_effective