I am creating a AWS glue job to extract data from a OLTP database into redshift database. I want SQL query to extract incremental data from a table. My table has columns like CreatedOn and LastUpdatedOn to track changes made in table.
CREATE TABLE [dbo].[Table](
[Id] [int] IDENTITY(1,1) NOT NULL,
[CreatedOn] [datetimeoffset](7) NOT NULL,
[CreatedBy] [nvarchar](255) NOT NULL,
[LastUpdatedBy] [nvarchar](255) NOT NULL,
[LastUpdatedOn] [datetimeoffset](7) NOT NULL,
[RowVersion] [timestamp] NOT NULL,
[Type] [nvarchar](128) NULL,
[Title] [nvarchar](256) NULL,
[FirstName] [nvarchar](256) NULL,
[MiddleName] [nvarchar](256) NULL,
[HasMiddleName] [bit] NULL,
[Surname] [nvarchar](256) NULL,
[DateOfBirth] [datetime] NULL,
[Gender] [nvarchar](16) NULL,
CONSTRAINT [PK_Table] PRIMARY KEY CLUSTERED
(
[Id] ASC
)
)
Below is the query for full load
SELECT
Id
,CONVERT(VARCHAR, CreatedOn, 20) AS CreatedOn
,CreatedBy
,LastUpdatedBy
,CONVERT(VARCHAR, LastUpdatedOn, 20) AS LastUpdatedOn
,Type
,Title
,TRIM(REPLACE(FirstName,CHAR(10),' ')) AS FirstName
,TRIM(REPLACE(MiddleName,CHAR(10),' ')) AS MiddleName
,HasMiddleName
,REPLACE(Surname,CHAR(10),' ') AS Surname
,TRIM(CONVERT(VARCHAR, DateOfBirth, 20)) AS DateOfBirth
,Gender
FROM
table
Get the execution time for last successful job run and filter it as shown below
SELECT
Id
,CONVERT(VARCHAR, CreatedOn, 20) AS CreatedOn
,CreatedBy
,LastUpdatedBy
,CONVERT(VARCHAR, LastUpdatedOn, 20) AS LastUpdatedOn
,Type
,Title
,TRIM(REPLACE(FirstName,CHAR(10),' ')) AS FirstName
,TRIM(REPLACE(MiddleName,CHAR(10),' ')) AS MiddleName
,HasMiddleName
,REPLACE(Surname,CHAR(10),' ') AS Surname
,TRIM(CONVERT(VARCHAR, DateOfBirth, 20)) AS DateOfBirth
,Gender
FROM table
WHERE CreatedOn > '<LastSuccessful Job Execution Time>'
OR LastUpdatedOn > '<LastSuccessful Job Execution Time>'