I have been trying to load data from one table that has 100 million + rows into a new table based on a filter. I have tried to load using the procedure below but for some reason it just sits there with no result or activity in processlist.
I have been trying to insert data based on updated date but end up either inserting the same data each time running into a PK violation or no update.
Please advise what's the best way to load this data into a new table. Thanks
USE `mydb`;
DROP procedure IF EXISTS `insert_data`;
DELIMITER $$
USE `ods`$$
CREATE PROCEDURE `insert_data` ()
BEGIN
DECLARE i INT DEFAULT 0;
DECLARE limitSize INT DEFAULT 2000;
DECLARE maxupdate datetime;
SET maxupdate = (SELECT MAX(updated) FROM test_data);
WHILE i <= maxId DO
START TRANSACTION;
INSERT INTO test_arc
SELECT * FROM test_data
WHERE check_time>='2022-02-05' and updated> i
ORDER BY updated ASC
LIMIT limitSize;
COMMIT;
SET i = i + limitSize;
END WHILE;
END$$
DELIMITER;
--My Tables:
--Actual Table
CREATE TABLE test_data (
UNIQUE_ID bigint(11) NOT NULL,
DEVICE_NAME varchar(128) NOT NULL DEFAULT '',
PARAMETER_NAME varchar(256) DEFAULT NULL,
GMT_OFFSET varchar(6) DEFAULT NULL,
CHECK_TIME datetime NOT NULL DEFAULT '1970-01-01 00:00:00',
OUTPUT_DATA text NOT NULL,
PERF_DATA text NOT NULL,
PRIMARY KEY (UNIQUE_ID)
);
--Archive Table
CREATE TABLE test_arc (
UNIQUE_ID bigint(11) NOT NULL,
DEVICE_NAME varchar(128) NOT NULL DEFAULT '',
PARAMETER_NAME varchar(256) DEFAULT NULL,
GMT_OFFSET varchar(6) DEFAULT NULL,
CHECK_TIME datetime NOT NULL DEFAULT '1970-01-01 00:00:00',
OUTPUT_DATA text NOT NULL,
PERF_DATA text NOT NULL,
PRIMARY KEY (UNIQUE_ID)
);
Transaction you don't really that is why i removed it, the rest ist controlled by LIMIT and OFFSET which you need to do such a partial updates in block
CREATE TABLE test_arc (id int,check_time datetime, updated datetime)
CREATE TABLE test_data (id int,check_time datetime, updated Datetime)
INSERT INTO test_data VALUES (1,NOW() - INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY),(2,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY) ,(3,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY),(4,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY) ,(5,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY), (6,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY),(7,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY) ,(8,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY) ,(9,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY),(10,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY)
SELECT * FROM test_data
id | check_time | updated -: | :------------------ | :------------------ 1 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12 2 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12 3 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12 4 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12 5 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12 6 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12 7 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12 8 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12 9 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12 10 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12
CREATE PROCEDURE `insert_data` () BEGIN DECLARE i BIGINT DEFAULT 0; DECLARE limitSize INT DEFAULT 5; DECLARE maxnumber BIGINT; SET maxnumber = (SELECT COUNT(*) FROM test_data) + 1; WHILE i <= maxnumber DO INSERT INTO test_arc SELECT id ,check_time, NOW() FROM test_data WHERE check_time>='2022-02-05' ORDER BY updated ASC LIMIT i, limitSize ON DUPLICATE KEY UPDATE id =VALUES(id), check_time=VALUES(check_time),updated = Values(updated ); SET i = i + limitSize; END WHILE; END
✓
CALL insert_data();
SELECT * FROM test_arc
id | check_time | updated -: | :------------------ | :------------------ 1 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12 2 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12 3 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12 4 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12 5 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12 6 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12 7 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12 8 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12 9 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12 10 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12
db<>fiddle here