Edit :- for any one with a similar problem, there's a good article covering various solutions here
Given the following tables recs and audit, how would one in SQL transform into the resultant table.
A little background, the former table is a simplified example of an standard SQL table used in a CRUD application collecting data. On any update to a column a record is written to an audit table in EAV form. There is now a requirement to transform the recs table into a historical table with a copy of each row as it was at a point in time for reporting (the data will be stored in a star schema data warehouse ultimately.
It seems like this would be straightforward enough in a procedural language and manageable (if ugly) using cursors, but is there a set based approach that would work?
I'm using T-SQL right now, but I imagine that I could port any examples or ideas from any sufficiently rich SQL dialect.
create table recs
(
ID int identity(1,1) not null primary key,
Column1 nvarchar(30) not null,
Column2 nvarchar(30) not null,
sys_updated_on datetime not null
)
create table audit
(
ID int identity(1,1) not null primary key,
recs_id int not null,
fieldname nvarchar(30) not null,
old_value nvarchar(30) not null,
new_value nvarchar(30) not null,
sys_updated_on datetime not null
)
insert into recs (Column1, Column2, sys_updated_on)
values ('A', 'B', '2012-10-31 22:00')
, ('C', 'D', '2012-10-31 22:30')
insert into audit (recs_id, fieldname, old_value, new_value, sys_updated_on)
values (1, 'Column1', 'Z', 'A', '2012-10-31 22:00')
, (2, 'Column2','X', 'D', '2012-10-31 22:30')
, (1, 'Column1', 'Y', 'Z', '2012-10-31 21:00')
Recs ID Column1 Column2 sys_updated_on 1 A B 31/10/2012 22:00:00 2 C D 31/10/2012 22:30:00
Audit ID recs_id fieldname old_value new_value sys_updated_on 1 1 Column1 Z A 31/10/2012 22:00:00 2 2 Column2 X D 31/10/2012 22:30:00 3 1 Column1 Y Z 31/10/2012 21:00:00
recs_id sys_updated_on Column1 Column2 1 null Y B 1 31/10/2012 21:00:00 Z B 1 31/10/2012 22:00:00 A B 2 null C X 2 31/10/2012 22:30:00 C D
Interesting....
Try this
;with cte as
(
select recs_id, sys_updated_on, column1, column2,
ROW_NUMBER() over (order by sys_updated_on) rn
from audit a
pivot
(max(old_value) for fieldname in (column1,column2)) p
)
select
recs_id,
case when ud1>ud2 then ud1 else ud2 end as updateddate,
coalesce(cte.column1,mc1,recs.column1),
coalesce(cte.column2,mc2,recs.column2)
from cte
outer apply
(
select top 1
column1 as mc1, sys_updated_on as ud1
from cte prev1
where prev1.recs_id=cte.recs_id and prev1.rn<cte.rn
order by prev1.rn desc
) r1
outer apply
(
select top 1
column2 as mc2, sys_updated_on as ud2
from cte prev2
where prev2.recs_id=cte.recs_id and prev2.rn<cte.rn
order by prev2.rn desc
) r2
inner join recs on cte.recs_id = recs.id
where cte.sys_updated_on is not null
union
select id, sys_updated_on, Column1, Column2 from recs
order by recs_id, cte.updateddate