Tables:- schema.INFA_TASK_RUN_STG schema.INFA_TASK_RUN
Primary Index of schema.INFA_TASK_RUN_STG :- SUBJECT_AREA Primary Index of schema.INFA_TASK_RUN :- SUBJECT_ID ,WORKFLOW_ID ,WORKFLOW_RUN_ID ,WORKLET_RUN_ID , INSTANCE_ID ,TASK_ID ,START_TIME
Merge statement in Teradata:-
MERGE INTO schema.INFA_TASK_RUN USING schema.INFA_TASK_RUN_STG src
ON
INFA_TASK_RUN_RAW.SUBJECT_ID = src.SUBJECT_ID
AND INFA_TASK_RUN_RAW.WORKFLOW_ID = src.WORKFLOW_ID
AND INFA_TASK_RUN_RAW.WORKFLOW_RUN_ID = src.WORKFLOW_RUN_ID
AND INFA_TASK_RUN_RAW.WORKLET_RUN_ID = src.WORKLET_RUN_ID
AND INFA_TASK_RUN_RAW.INSTANCE_ID = src.INSTANCE_ID
AND INFA_TASK_RUN_RAW.TASK_ID = src.TASK_ID
AND INFA_TASK_RUN_RAW.START_TIME = src.START_TIME
WHEN MATCHED THEN UPDATE SET
END_TIME = src.END_TIME
, RUN_ERR_CODE = src.RUN_ERR_CODE
, RUN_ERR_MSG = src.RUN_ERR_MSG
, RUN_STATUS_CODE = src.RUN_STATUS_CODE
WHEN NOT MATCHED THEN INSERT(
SUBJECT_AREA
, WORKFLOW_NAME
, VERSION_NUMBER
, SUBJECT_ID
, WORKFLOW_ID
, WORKFLOW_RUN_ID
, WORKLET_RUN_ID
, CHILD_RUN_ID
, INSTANCE_ID
, INSTANCE_NAME
, TASK_ID
, TASK_TYPE_NAME
, TASK_TYPE
, START_TIME
, END_TIME
, RUN_ERR_CODE
, RUN_ERR_MSG
, RUN_STATUS_CODE
, TASK_NAME
, TASK_VERSION_NUMBER
, SERVER_ID
, SERVER_NAME
)VALUES(
src.SUBJECT_AREA
, src.WORKFLOW_NAME
, src.VERSION_NUMBER
, src.SUBJECT_ID
, src.WORKFLOW_ID
, src.WORKFLOW_RUN_ID
, src.WORKLET_RUN_ID
, src.CHILD_RUN_ID
, src.INSTANCE_ID
, src.INSTANCE_NAME
, src.TASK_ID
, src.TASK_TYPE_NAME
, src.TASK_TYPE
, src.START_TIME
, src.END_TIME
, src.RUN_ERR_CODE
, src.RUN_ERR_MSG
, src.RUN_STATUS_CODE
, src.TASK_NAME
, src.TASK_VERSION_NUMBER
, src.SERVER_ID
, src.SERVER_NAME
);
As I Know MYSQL DB doesn't support Merge statement. I am trying update and insert statement. but it doesn't seems to be right.
UPDATE schema.INFA_TASK_RUN tgt INNER JOIN schema.INFA_TASK_RUN_STG src
ON
tgt.SUBJECT_ID = src.SUBJECT_ID
AND tgt.WORKFLOW_ID = src.WORKFLOW_ID
AND tgt.WORKFLOW_RUN_ID = src.WORKFLOW_RUN_ID
AND tgt.WORKLET_RUN_ID = src.WORKLET_RUN_ID
AND tgt.INSTANCE_ID = src.INSTANCE_ID
AND tgt.TASK_ID = src.TASK_ID
AND tgt.START_TIME = src.START_TIME
SET
tgt.END_TIME = src.END_TIME
, tgt.RUN_ERR_CODE = src.RUN_ERR_CODE
, tgt.RUN_ERR_MSG = src.RUN_ERR_MSG
, tgt.RUN_STATUS_CODE = src.RUN_STATUS_CODE;
insert into schema.INFA_TASK_RUN (SUBJECT_AREA , WORKFLOW_NAME , VERSION_NUMBER , SUBJECT_ID , WORKFLOW_ID , WORKFLOW_RUN_ID , WORKLET_RUN_ID , CHILD_RUN_ID , INSTANCE_ID , INSTANCE_NAME , TASK_ID , TASK_TYPE_NAME , TASK_TYPE , START_TIME , END_TIME , RUN_ERR_CODE , RUN_ERR_MSG , RUN_STATUS_CODE , TASK_NAME , TASK_VERSION_NUMBER , SERVER_ID , SERVER_NAME)
select src.SUBJECT_AREA , src.WORKFLOW_NAME , src.VERSION_NUMBER , src.SUBJECT_ID , src.WORKFLOW_ID , src.WORKFLOW_RUN_ID , src.WORKLET_RUN_ID , src.CHILD_RUN_ID , src.INSTANCE_ID , src.INSTANCE_NAME , src.TASK_ID , src.TASK_TYPE_NAME , src.TASK_TYPE , src.START_TIME , src.END_TIME , src.RUN_ERR_CODE , src.RUN_ERR_MSG , src.RUN_STATUS_CODE , src.TASK_NAME , src.TASK_VERSION_NUMBER , src.SERVER_ID , src.SERVER_NAME
from schema.INFA_TASK_RUN_STG as src
left outer join schema.INFA_TASK_RUN as tgt ON
tgt.SUBJECT_ID != src.SUBJECT_ID
AND tgt.WORKFLOW_ID != src.WORKFLOW_ID
AND tgt.WORKFLOW_RUN_ID != src.WORKFLOW_RUN_ID
AND tgt.WORKLET_RUN_ID != src.WORKLET_RUN_ID
AND tgt.INSTANCE_ID != src.INSTANCE_ID
AND tgt.TASK_ID != src.TASK_ID
AND tgt.START_TIME != src.START_TIME
Believe what you are looking for is something like this (Not tested and will only work when the primary key is set up correctly):
INSERT INTO schema.INFA_TASK_RUN (
SUBJECT_AREA
,WORKFLOW_NAME
,VERSION_NUMBER
,SUBJECT_ID
,WORKFLOW_ID
,WORKFLOW_RUN_ID
,WORKLET_RUN_ID
,CHILD_RUN_ID
,INSTANCE_ID
,INSTANCE_NAME
,TASK_ID
,TASK_TYPE_NAME
,TASK_TYPE
,START_TIME
,END_TIME
,RUN_ERR_CODE
,RUN_ERR_MSG
,RUN_STATUS_CODE
,TASK_NAME
,TASK_VERSION_NUMBER
,SERVER_ID
,SERVER_NAME
)
SELECT
SUBJECT_AREA
,WORKFLOW_NAME
,VERSION_NUMBER
,SUBJECT_ID
,WORKFLOW_ID
,WORKFLOW_RUN_ID
,WORKLET_RUN_ID
,CHILD_RUN_ID
,INSTANCE_ID
,INSTANCE_NAME
,TASK_ID
,TASK_TYPE_NAME
,TASK_TYPE
,START_TIME
,END_TIME
,RUN_ERR_CODE
,RUN_ERR_MSG
,RUN_STATUS_CODE
,TASK_NAME
,TASK_VERSION_NUMBER
,SERVER_ID
,SERVER_NAME
FROM schema.INFA_TASK_RUN_STG src
ON DUPLICATE KEY UPDATE
END_TIME = src.END_TIME
,RUN_ERR_CODE = src.RUN_ERR_CODE
,RUN_ERR_MSG = src.RUN_ERR_MSG
,RUN_STATUS_CODE = src.RUN_STATUS_CODE;
Edit on 2020-05-21 to show separate update and insert statements based on the comments:
The INSERT ... ON DUPLICATE KEY statement will probably be faster.
From the comments I tested the original statements to do the insert and update from the question.
Note your update statement works correctly. Only issue is that every row is updated even if there were no changes.
Could be possible to add conditions to the join like tgt.END_TIME != src.END_TIME to ensure only changed records are updated.
Original update query in your question:
UPDATE schema.INFA_TASK_RUN tgt INNER JOIN schema.INFA_TASK_RUN_STG src
ON
tgt.SUBJECT_ID = src.SUBJECT_ID
AND tgt.WORKFLOW_ID = src.WORKFLOW_ID
AND tgt.WORKFLOW_RUN_ID = src.WORKFLOW_RUN_ID
AND tgt.WORKLET_RUN_ID = src.WORKLET_RUN_ID
AND tgt.INSTANCE_ID = src.INSTANCE_ID
AND tgt.TASK_ID = src.TASK_ID
AND tgt.START_TIME = src.START_TIME
SET
tgt.END_TIME = src.END_TIME
, tgt.RUN_ERR_CODE = src.RUN_ERR_CODE
, tgt.RUN_ERR_MSG = src.RUN_ERR_MSG
, tgt.RUN_STATUS_CODE = src.RUN_STATUS_CODE;
Updated insert:
Had to change the insert statement, please see the JOIN is where the columns are equal and we only select where there are no matching value in the target table with the check that the target table column is NULL:
INSERT INTO schema.INFA_TASK_RUN (
SUBJECT_AREA
,WORKFLOW_NAME
,VERSION_NUMBER
,SUBJECT_ID
,WORKFLOW_ID
,WORKFLOW_RUN_ID
,WORKLET_RUN_ID
,CHILD_RUN_ID
,INSTANCE_ID
,INSTANCE_NAME
,TASK_ID
,TASK_TYPE_NAME
,TASK_TYPE
,START_TIME
,END_TIME
,RUN_ERR_CODE
,RUN_ERR_MSG
,RUN_STATUS_CODE
,TASK_NAME
,TASK_VERSION_NUMBER
,SERVER_ID
,SERVER_NAME
)
select src.SUBJECT_AREA
,src.WORKFLOW_NAME
,src.VERSION_NUMBER
,src.SUBJECT_ID
,src.WORKFLOW_ID
,src.WORKFLOW_RUN_ID
,src.WORKLET_RUN_ID
,src.CHILD_RUN_ID
,src.INSTANCE_ID
,src.INSTANCE_NAME
,src.TASK_ID
,src.TASK_TYPE_NAME
,src.TASK_TYPE
,src.START_TIME
,src.END_TIME
,src.RUN_ERR_CODE
,src.RUN_ERR_MSG
,src.RUN_STATUS_CODE
,src.TASK_NAME
,src.TASK_VERSION_NUMBER
,src.SERVER_ID
,src.SERVER_NAME
FROM schema.INFA_TASK_RUN as tgt
RIGHT JOIN schema.INFA_TASK_RUN_STG as src ON
tgt.SUBJECT_ID = src.SUBJECT_ID
AND tgt.WORKFLOW_ID = src.WORKFLOW_ID
AND tgt.WORKFLOW_RUN_ID = src.WORKFLOW_RUN_ID
AND tgt.WORKLET_RUN_ID = src.WORKLET_RUN_ID
AND tgt.INSTANCE_ID = src.INSTANCE_ID
AND tgt.TASK_ID = src.TASK_ID
AND tgt.START_TIME = src.START_TIME
WHERE tgt.SUBJECT_ID IS NULL;