I have a query (bottom) I run against this table of hourly data that looks for missing records -- where I should "gaps" whenever the time between sequential records is greater than 3600 seconds (or 1 hour) from a record to the next temporally based on Intervaldatetime
. It used to perform well enough but is now taking many minutes to run. Any thoughts on how I can speed this up? It's about 100,000 records running on Maria DB 10.6 (Ubuntu 22.04).
CREATE TABLE `electric_use` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`Intervaldatetime` timestamp(6) NULL DEFAULT NULL,
`kwh` double(11,4) DEFAULT NULL,
`ts` timestamp NULL DEFAULT current_timestamp() ON UPDATE current_timestamp(),
`notes` varchar(250) DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `uniq_dt` (`Intervaldatetime`,`kwh`),
KEY `date_index` (`Intervaldatetime`)
insert into `electric_use` (`id`, `Intervaldatetime`, `kwh`, `ts`, `notes`) values('2000','2014-04-25 13:00:00.000000','1.4730',NULL,NULL);
insert into `electric_use` (`id`, `Intervaldatetime`, `kwh`, `ts`, `notes`) values('2001','2014-04-25 14:00:00.000000','1.6610',NULL,NULL);
insert into `electric_use` (`id`, `Intervaldatetime`, `kwh`, `ts`, `notes`) values('2002','2014-04-25 15:00:00.000000','1.5140',NULL,NULL);
insert into `electric_use` (`id`, `Intervaldatetime`, `kwh`, `ts`, `notes`) values('2003','2014-04-25 16:00:00.000000','1.7070',NULL,NULL);
insert into `electric_use` (`id`, `Intervaldatetime`, `kwh`, `ts`, `notes`) values('2004','2014-04-25 17:00:00.000000','1.7210',NULL,NULL);
insert into `electric_use` (`id`, `Intervaldatetime`, `kwh`, `ts`, `notes`) values('2005','2014-04-25 18:00:00.000000','1.7650',NULL,NULL);
insert into `electric_use` (`id`, `Intervaldatetime`, `kwh`, `ts`, `notes`) values('2006','2014-04-25 19:00:00.000000','2.6490',NULL,NULL);
insert into `electric_use` (`id`, `Intervaldatetime`, `kwh`, `ts`, `notes`) values('2007','2014-04-25 20:00:00.000000','1.6050',NULL,NULL);
insert into `electric_use` (`id`, `Intervaldatetime`, `kwh`, `ts`, `notes`) values('2009','2014-04-25 22:00:00.000000','1.5970',NULL,NULL);
insert into `electric_use` (`id`, `Intervaldatetime`, `kwh`, `ts`, `notes`) values('2010','2014-04-25 23:00:00.000000','2.2840',NULL,NULL);
and here is the query.
SELECT
`t`.`Intervaldatetime` AS `GapStart`,
`t`.`NextDateTime` AS `GapEnd`,
TIMESTAMPDIFF(SECOND,`t`.`Intervaldatetime`,`t`.`NextDateTime`) AS `SizeInSecond`
FROM (SELECT
`t1`.`Intervaldatetime` AS `Intervaldatetime`,
(SELECT
`t2`.`Intervaldatetime`
FROM `electric_use` `t2`
WHERE `t2`.`Intervaldatetime` > `t1`.`Intervaldatetime`
LIMIT 1) AS `NextDateTime`
FROM `electric_use` `t1`) `t`
WHERE TIMESTAMPDIFF(SECOND,`t`.`Intervaldatetime`,`t`.`NextDateTime`) > 3600)
You should use LEAD instead of a subselect to find the next time. This should be much more efficient.
SELECT
`t`.`Intervaldatetime` AS `GapStart`,
`t`.`NextDateTime` AS `GapEnd`,
TIMESTAMPDIFF(SECOND,`t`.`Intervaldatetime`,`t`.`NextDateTime`) AS `SizeInSecond`
FROM (
SELECT
Intervaldatetime,
LEAD(Intervaldatetime) OVER (ORDER BY Intervaldatetime) NextDateTime
FROM electric_use
) t
WHERE TIMESTAMPDIFF(SECOND,`t`.`Intervaldatetime`,`t`.`NextDateTime`) > 3600