Search code examples
sqlsassas-macro

How to get the second max and third max records of date column in sas


I want to get the max, second max and third max date of below table

proc SQL;

create table Kri_obs_l(
  Kri_rk int,
  value_dt datetime,
  score int
);

insert into Kri_obs_l
values (10000, "2Jan2018"d, 12)
values (10001, "2Jan2018"d, 2)
values (10001, "29Jan2019"d, 18)
values (10002, "2Jan2018"d, 5)
values (10002, "26Jan2019"d, 18)
values (10002, "27Jan2019"d, 18)
values (10003, "2Jan2018"d, 18)
values (10003, "26Jan2019"d, 18)
values (10003, "27Jan2019"d, 18)
values (10004, "2Jan2018"d, 25)
values (10005, "2Jan2018"d, 32)
values (10005, "26Jan2019"d, 18)
values (10005, "27Jan2019"d, 18)
values (10006, "2Jan2018"d, 5)
values (10007, "2Jan2018"d, 2)
values (10008, "2Jan2018"d, 12)
values (10009, "2Jan2018"d, 18)
values (10009, "26Jan2019"d, 18)
values (10009, "27Jan2019"d, 18)
;
quit;

My Code to get the max, second max, and third max

proc sql;
create table KRI_score_max as
select Kri_rk, MAX(value_dt) as reported_dt format=date9.
from KRI_OBS_l
group by Kri_rk;

create table KRI_score_second_max as
select Kri_rk, MAX(value_dt) as reported_dt format=date9.
from KRI_OBS_l a
where value_dt = (select MAX(value_dt) from KRI_OBS_l b where value_dt< (select MAX(value_dt) from KRI_OBS_l))
group by Kri_rk;

create table KRI_score_third_max as
select Kri_rk, MAX(value_dt) as reported_dt format=date9.
from KRI_OBS_l
where value_dt < (select MAX(value_dt) from KRI_OBS_l where value_dt< (select MAX(value_dt) from KRI_OBS_l))
group by Kri_rk;

create table KRI_score_third_max as
select * 
from KRI_score_third_max
where KRI_rk in (select Kri_rk from KRI_score_second_max);

quit;

getting the max record is working fine, I'm having the issue in getting the second max and third max record if I remove the 29Jan2019 record from the table it works fine, but adding that record makes 27Jan2019 the second max record so query gets this record as the second max. I want to get the second max and third max record of every record group by Kri_rk.

Since there is not any limit function in proc SQL, I can't use order by desc and limit combination.


Solution

  • Simple proc rank should work

    proc rank data=Kri_obs_l out=inter descending ties=dense;
       by  Kri_rk;
      var value_dt ;
      ranks value_dt_1; 
     run; 
    
    data final(drop=value_dt_1);
      set inter;
     if value_dt_1 le 3;
     run;
    

    Assuming if you do not have duplicates in dates per id then below queries should work too

      proc sort data =Kri_obs_l out =Kri_obs_l_1;
         by  Kri_rk descending value_dt;
       run;
    
      data want;
        set Kri_obs_l_1;
      by  Kri_rk descending value_dt;
     if first.Kri_rk then m=1;
     else m+1;
       if m le 3;
     drop m;
       run;
    
    
       proc sql;
      create table want(drop=cnt) as 
         select Kri_rk, value_dt, score,
           (select count(value_dt) from Kri_obs_l a
        where a.Kri_rk = b.Kri_rk
       and a.value_dt ge b.value_dt
       group by Kri_rk) as cnt
       from Kri_obs_l b 
       where calculated cnt le 3
       ;