Search code examples
sqlmysqlleft-joinsummary

Group by and Left join


create table test_txn (
    ttid bigint(20) unsigned NOT NULL AUTO_INCREMENT,
    `PROFILEID` int(11) NOT NULL,
    `MERCHANT_TXN_ID` varchar(32) NOT NULL,
    `TXN_STATUS` int(4) NOT NULL,
    `TXN_TYPE` varchar(6) NOT NULL,
    `TXN_SUB_TYPE` varchar(50) DEFAULT NULL,
    PRIMARY KEY (`ttid`)
);

create table test_summ(
    sumid bigint(20) unsigned NOT NULL AUTO_INCREMENT, 
    `PROFILEID` int(11) NOT NULL,   
    `MERCHANT_TXN_ID` varchar(32) NOT NULL,   
    `TXN_STATUS` int(4) NOT NULL,   
    `TXN_TYPE` varchar(6) NOT NULL,   
    `TXN_SUB_TYPE` varchar(50) DEFAULT NULL, 
    cnt int(11) NOT NULL, 
    PRIMARY KEY (`sumid`)
);

test_summ is summary table for test_txn

insert into test_txn 
values (1,1,1,1,2,NULL), (2,1,1,1,2,NULL), (3,1,1,4,2,NULL), (4,2,1,4,2,NULL), (5,2,1,5,2,NULL);

insert into test_summ (
    SELECT NULL, PROFILEID, MERCHANT_TXN_ID, TXN_STATUS, TXN_TYPE, TXN_SUB_TYPE, count(*) cnt 
    from test_txn 
    group by  PROFILEID, MERCHANT_TXN_ID, TXN_STATUS, TXN_TYPE, TXN_SUB_TYPE
);

insert into test_txn 
values (6,2,1,30,2,NULL), (7,2,1,30,2,NULL);

I want to fetch records for which summrization is NOT done from test_txn to test_summ.

I wrote this query:

SELECT * 
from (
    select count(*) cnt,a.PROFILEID, a.MERCHANT_TXN_ID, a.TXN_STATUS, a.TXN_TYPE, a.TXN_SUB_TYPE 
    from test_txn a 
    group by  a.PROFILEID, a.MERCHANT_TXN_ID, a.TXN_STATUS, a.TXN_TYPE, a.TXN_SUB_TYPE
) b 
LEFT join test_summ c ON b.PROFILEID = c.PROFILEID and b.MERCHANT_TXN_ID = c.MERCHANT_TXN_ID and b.TXN_STATUS = c.TXN_STATUS and b.TXN_TYPE = c.TXN_TYPE and b.TXN_SUB_TYPE = c.TXN_SUB_TYPE and b.cnt = c.cnt 
WHERE c.PROFILEID IS NULL;

But it provides all rows from left table. It should ideally give rows where TXN_STATUS is 30. So what can be right query ?

I want to fetch results from transaction table, which are not summarized in summary table.


Solution

  • The problem is using b.TXN_SUB_TYPE = c.TXN_SUB_TYPE in the ON condition. Since these columns contain NULL, the condition will never be true because NULL = NULL is not true.

    You can use the null-safe equality operator <=> instead.

    SELECT * 
    from (
        select count(*) cnt,a.PROFILEID, a.MERCHANT_TXN_ID, a.TXN_STATUS, a.TXN_TYPE, a.TXN_SUB_TYPE 
        from test_txn a 
        group by  a.PROFILEID, a.MERCHANT_TXN_ID, a.TXN_STATUS, a.TXN_TYPE, a.TXN_SUB_TYPE
    ) b 
    left join test_summ c ON b.PROFILEID = c.PROFILEID and b.MERCHANT_TXN_ID = c.MERCHANT_TXN_ID and b.TXN_STATUS = c.TXN_STATUS and b.TXN_TYPE = c.TXN_TYPE and b.TXN_SUB_TYPE <=> c.TXN_SUB_TYPE and b.cnt = c.cnt 
    WHERE c.PROFILEID IS NULL;