Search code examples
mysqlbulkinsertbulk

Insert data from one table to another table in chunks - MySQL


I have been trying to load data from one table that has 100 million + rows into a new table based on a filter. I have tried to load using the procedure below but for some reason it just sits there with no result or activity in processlist.

I have been trying to insert data based on updated date but end up either inserting the same data each time running into a PK violation or no update.

Please advise what's the best way to load this data into a new table. Thanks

USE `mydb`;
DROP procedure IF EXISTS `insert_data`;

DELIMITER $$
USE `ods`$$
CREATE PROCEDURE `insert_data` ()
BEGIN
DECLARE i INT DEFAULT 0;
DECLARE limitSize INT DEFAULT 2000;
DECLARE maxupdate datetime;

SET maxupdate = (SELECT MAX(updated) FROM test_data);

WHILE i <= maxId DO
START TRANSACTION;
INSERT INTO test_arc
SELECT * FROM test_data 
WHERE check_time>='2022-02-05' and updated> i
ORDER BY updated ASC
LIMIT limitSize;
COMMIT;
SET i = i + limitSize;
END WHILE;
END$$
DELIMITER;

--My Tables:
--Actual Table
 CREATE TABLE test_data (
 UNIQUE_ID bigint(11) NOT NULL,
 DEVICE_NAME varchar(128) NOT NULL DEFAULT '',
 PARAMETER_NAME varchar(256) DEFAULT NULL,
 GMT_OFFSET varchar(6) DEFAULT NULL,
 CHECK_TIME datetime NOT NULL DEFAULT '1970-01-01 00:00:00',
 OUTPUT_DATA text NOT NULL,
 PERF_DATA text NOT NULL,
 PRIMARY KEY (UNIQUE_ID)
 );

 --Archive Table
CREATE TABLE test_arc (
UNIQUE_ID bigint(11) NOT NULL,
DEVICE_NAME varchar(128) NOT NULL DEFAULT '',
PARAMETER_NAME varchar(256) DEFAULT NULL,
GMT_OFFSET varchar(6) DEFAULT NULL,
CHECK_TIME datetime NOT NULL DEFAULT '1970-01-01 00:00:00',
OUTPUT_DATA text NOT NULL,
PERF_DATA text NOT NULL,
PRIMARY KEY (UNIQUE_ID)

);


Solution

  • Transaction you don't really that is why i removed it, the rest ist controlled by LIMIT and OFFSET which you need to do such a partial updates in block

    CREATE TABLE test_arc (id int,check_time datetime, updated datetime)
    
    CREATE TABLE test_data (id int,check_time datetime, updated Datetime)
    
    INSERT INTO test_data VALUES (1,NOW() - INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY),(2,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY)
    ,(3,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY),(4,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY)
    ,(5,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY),
    (6,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY),(7,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY)
    ,(8,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY)
    ,(9,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY),(10,NOW()- INTERVAL 1 DAY,NOW()- INTERVAL 1 DAY)
    
    SELECT * FROM test_data 
    
    id | check_time          | updated            
    -: | :------------------ | :------------------
     1 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12
     2 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12
     3 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12
     4 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12
     5 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12
     6 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12
     7 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12
     8 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12
     9 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12
    10 | 2022-02-12 18:27:12 | 2022-02-12 18:27:12
    
    CREATE PROCEDURE `insert_data` ()
    BEGIN
    DECLARE i BIGINT DEFAULT 0;
    DECLARE limitSize INT DEFAULT 5;
    DECLARE maxnumber BIGINT;
    
    SET maxnumber = (SELECT COUNT(*) FROM test_data) + 1;
    
    WHILE i <= maxnumber DO
         INSERT INTO test_arc
             SELECT id ,check_time, NOW() FROM test_data 
             WHERE check_time>='2022-02-05'
             ORDER BY updated ASC
             LIMIT i, limitSize
         ON DUPLICATE KEY UPDATE id =VALUES(id), check_time=VALUES(check_time),updated  = Values(updated );   SET i = i + limitSize;
    END WHILE;
    END
    
    CALL insert_data();
    
    SELECT * FROM test_arc
    
    id | check_time          | updated            
    -: | :------------------ | :------------------
     1 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12
     2 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12
     3 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12
     4 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12
     5 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12
     6 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12
     7 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12
     8 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12
     9 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12
    10 | 2022-02-12 18:27:12 | 2022-02-13 18:27:12
    

    db<>fiddle here