I do have an application based on the Activiti engine in version 5.22. Currently, it produces in a given time frame more history information than the history cleanup process is able to delete in the same time frame. Is there any idea how to improve this behavior?
The Activity framework creates history data in the following tables:
ACT_HI_PROCINST
ACT_HI_ACTINST
ACT_HI_TASKINST
ACT_GE_BYTEARRAY
ACT_HI_VARINST
ACT_HI_DETAIL
ACT_HI_COMMENT
ACT_HI_ATTACHMENT
ACT_HI_IDENTITYLINK
The default cleanup process fetches the ACT_HI_PROCINST
and then tries to delete them, along with the other history tables, that are linked to a given ACT_HI_PROCINST
. The delete process doesn't use batching or bulk deletes, hence it's very slow.
One way to speed up the cleanup process is to execute a Stored Procedure, like the following one:
CREATE PROCEDURE usp_DeleteActivityHistory(
@BeforeStartTimestamp DATETIME,
@BatchSize INT,
@DeletedRowCount INT OUTPUT
)
AS
BEGIN
DROP TABLE IF EXISTS #ROOT_PROC_INST_ID_TABLE;
CREATE TABLE #ROOT_PROC_INST_ID_TABLE (PROC_INST_ID_ NVARCHAR(64));
DROP TABLE IF EXISTS #PROC_INST_ID_TABLE;
CREATE TABLE #PROC_INST_ID_TABLE (PROC_INST_ID_ NVARCHAR(64));
DROP TABLE IF EXISTS #TASK_INST_ID_TABLE;
CREATE TABLE #TASK_INST_ID_TABLE (ID_ NVARCHAR(64));
INSERT INTO #ROOT_PROC_INST_ID_TABLE
SELECT TOP (@BatchSize) PROC_INST_ID_
FROM ACT_HI_PROCINST
WHERE
END_TIME_ <= @BeforeStartTimestamp
AND END_TIME_ IS NOT NULL
AND SUPER_PROCESS_INSTANCE_ID_ IS NULL;
SET @DeletedRowCount=0;
DECLARE @DeletedBatchRowCount INT;
WHILE (SELECT COUNT(*) FROM #ROOT_PROC_INST_ID_TABLE) > 0
BEGIN
TRUNCATE TABLE #PROC_INST_ID_TABLE;
TRUNCATE TABLE #TASK_INST_ID_TABLE;
SET @DeletedBatchRowCount=0;
WITH ACT_HI_PROCINST_HIERARCHY(PROC_INST_ID_)
AS (
SELECT PROC_INST_ID_
FROM #ROOT_PROC_INST_ID_TABLE
UNION ALL
SELECT ACT_HI_PROCINST.PROC_INST_ID_
FROM ACT_HI_PROCINST
INNER JOIN ACT_HI_PROCINST_HIERARCHY ON ACT_HI_PROCINST_HIERARCHY.PROC_INST_ID_ = ACT_HI_PROCINST.SUPER_PROCESS_INSTANCE_ID_
)
INSERT INTO #PROC_INST_ID_TABLE
SELECT PROC_INST_ID_
FROM ACT_HI_PROCINST_HIERARCHY;
BEGIN TRY
BEGIN TRANSACTION;
DELETE FROM ACT_GE_BYTEARRAY
WHERE ID_ IN (
SELECT BYTEARRAY_ID_ FROM ACT_HI_DETAIL
WHERE PROC_INST_ID_ IN (SELECT PROC_INST_ID_ FROM #PROC_INST_ID_TABLE)
);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_HI_DETAIL
WHERE PROC_INST_ID_ IN (SELECT PROC_INST_ID_ FROM #PROC_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_GE_BYTEARRAY
WHERE ID_ IN (
SELECT BYTEARRAY_ID_ FROM ACT_HI_VARINST
WHERE PROC_INST_ID_ IN (SELECT PROC_INST_ID_ FROM #PROC_INST_ID_TABLE)
);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_HI_VARINST
WHERE PROC_INST_ID_ IN (SELECT PROC_INST_ID_ FROM #PROC_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_HI_ACTINST
WHERE PROC_INST_ID_ IN (SELECT PROC_INST_ID_ FROM #PROC_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
-- Delete ACT_HI_TASKINST rows recursive along with their associated:
-- ACT_HI_DETAIL, ACT_HI_VARINST, ACT_HI_COMMENT, ACT_HI_ATTACHMENT, ACT_HI_IDENTITYLINK
BEGIN
WITH ACT_HI_TASKINST_HIERARCHY(ID_)
AS (
SELECT ID_
FROM ACT_HI_TASKINST
WHERE PROC_INST_ID_ IN (SELECT PROC_INST_ID_ FROM #PROC_INST_ID_TABLE)
UNION ALL
SELECT ACT_HI_TASKINST.ID_
FROM ACT_HI_TASKINST
INNER JOIN ACT_HI_TASKINST_HIERARCHY ON ACT_HI_TASKINST_HIERARCHY.ID_ = ACT_HI_TASKINST.PARENT_TASK_ID_
)
INSERT INTO #TASK_INST_ID_TABLE
SELECT ID_
FROM ACT_HI_TASKINST_HIERARCHY;
DELETE FROM ACT_GE_BYTEARRAY
WHERE ID_ IN (
SELECT BYTEARRAY_ID_ FROM ACT_HI_DETAIL
WHERE TASK_ID_ IN (SELECT ID_ FROM #TASK_INST_ID_TABLE)
);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_HI_DETAIL
WHERE TASK_ID_ IN (SELECT ID_ FROM #TASK_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_GE_BYTEARRAY
WHERE ID_ IN (
SELECT BYTEARRAY_ID_ FROM ACT_HI_VARINST
WHERE TASK_ID_ IN (SELECT ID_ FROM #TASK_INST_ID_TABLE)
);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_HI_VARINST
WHERE TASK_ID_ IN (SELECT ID_ FROM #TASK_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_HI_COMMENT
WHERE TASK_ID_ IN (SELECT ID_ FROM #TASK_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_GE_BYTEARRAY
WHERE ID_ IN (
SELECT CONTENT_ID_ FROM ACT_HI_ATTACHMENT
WHERE TASK_ID_ IN (SELECT ID_ FROM #TASK_INST_ID_TABLE)
);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_HI_ATTACHMENT
WHERE TASK_ID_ IN (SELECT ID_ FROM #TASK_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_HI_IDENTITYLINK
WHERE TASK_ID_ IN (SELECT ID_ FROM #TASK_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_HI_TASKINST
WHERE ID_ IN (SELECT ID_ FROM #TASK_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
END;
DELETE FROM ACT_HI_IDENTITYLINK
WHERE PROC_INST_ID_ IN (SELECT PROC_INST_ID_ FROM #PROC_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_HI_COMMENT
WHERE PROC_INST_ID_ IN (SELECT PROC_INST_ID_ FROM #PROC_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_HI_PROCINST
WHERE PROC_INST_ID_ IN (SELECT PROC_INST_ID_ FROM #PROC_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
COMMIT TRANSACTION;
SET @DeletedRowCount+=@DeletedBatchRowCount;
END TRY
BEGIN CATCH
IF (XACT_STATE()) = -1
-- The current transaction cannot be committed.
BEGIN
PRINT
N'The transaction cannot be committed. Rolling back transaction.'
ROLLBACK TRANSACTION;
END;
ELSE
IF (XACT_STATE()) = 1
-- The current transaction can be committed.
BEGIN
PRINT
N'Exception was caught, but the trasaction can be committed.'
COMMIT TRANSACTION;
END;
END CATCH;
TRUNCATE TABLE #ROOT_PROC_INST_ID_TABLE;
INSERT INTO #ROOT_PROC_INST_ID_TABLE
SELECT TOP (@BatchSize) PROC_INST_ID_
FROM ACT_HI_PROCINST
WHERE
END_TIME_ <= @BeforeStartTimestamp
AND END_TIME_ IS NOT NULL
AND SUPER_PROCESS_INSTANCE_ID_ IS NULL;
END
DROP TABLE IF EXISTS #ROOT_PROC_INST_ID_TABLE;
DROP TABLE IF EXISTS #PROC_INST_ID_TABLE;
DROP TABLE IF EXISTS #TASK_INST_ID_TABLE;
END
This stored procedure has the following advantages:
You can execute this Stroed Procedure using JDBC, like this:
public int deleteActivityHistoryBeforeDate(
Connection connection,
Timestamp olderThanTimestamp,
int batchSize) {
long startNanos = System.nanoTime();
try (CallableStatement sp = connection.prepareCall(
"{ call usp_DeleteActivityHistory(?, ?, ?) }")
) {
sp.setTimestamp(1, olderThanTimestamp);
sp.setInt(2, batchSize);
sp.registerOutParameter("DeletedRowCount", Types.INTEGER);
sp.execute();
int rowCount = sp.getInt("DeletedRowCount");
LOGGER.info(
"Deleted {} records in {} milliseconds",
rowCount,
TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos)
);
return rowCount;
} catch (SQLException e) {
LOGGER.error("The usp_DeleteActivityHistory execution failed", e);
return 0;
}
}
The connection
parameter is the JDBC Connection
the procedure will execute on.
The olderThanTimestamp
is a Timestamp
object that tells what's the maximum timestamp value for the END_TIME_
column in ACT_HI_PROCINST
table up to which the history records are deleted.
The batchSize
parameter tells how many root ACT_HI_PROCINST
records are deleted in one transaction. After executing a batch, the process proceeds to the next one until no root ACT_HI_PROCINST
row, older than the olderThanTimestamp
value, is present in the database.
The return value contains the number of table records across all tables that were deleted by executing the stored procedure with the provided parameter values.
If you want to see how the Stored Procedure works in practice, check out this test case on GitHub.