Search code examples
sasaggregateprocdatastepsubquery

it is possible to convert subselect with max function into SAS datastep?


it is possible to convert code below from proc sql into sas datastep?

proc sql;
create table CAL_CHECK as
select t1.DT_REP 
     , t1.BANK_FLAG
     , (select MAX(t2.DT_REP) as LAST_BD_BANK_FLAG from CZ_LOOK.CZ_CALENDAR t2 where t2.DT_REP < t1.DT_REP and t2.BANK_FLAG='Y') as LAST_BD 
from CZ_LOOK.CZ_CALENDAR t1
where dt_rep between &first_day. and &last_day.;
quit;

thanks for response

example:

DT_REP      LAST_BD
01.04.2020  31.03.2020
02.04.2020  01.04.2020
03.04.2020  02.04.2020
04.04.2020  03.04.2020
05.04.2020  03.04.2020
06.04.2020  03.04.2020
07.04.2020  06.04.2020
08.04.2020  07.04.2020

1.4. was Wednesday, so last banking day is 31.3,
5.4. was Sunday, so the last banking day is 3.4,
6.4. was Monday, so the last banking day is 3.4. to


Solution

  • Yes. The sub-select is a self correlated sub-selected.

    One might consider LAG as suitable, however, the bank_flag conditional makes it impossible to use LAG for the task at hand.

    Retaining last_bd and updating its value only after using its value from prior state will in essence lag the variable conditionally as needed.

    Example:

    data CZ_CALENDAR;
      input (DT_REP LAST_BD) (ddmmyy10.:) bank_flag: $1. ;
      format dt_rep last_bd yymmdd10.;
    datalines;
    31.03.2020  .           Y Tue
    01.04.2020  31.03.2020  Y Wed
    02.04.2020  01.04.2020  Y Thu
    03.04.2020  02.04.2020  Y Fri
    04.04.2020  03.04.2020  N Sat
    05.04.2020  03.04.2020  N Sun
    06.04.2020  03.04.2020  Y Mon
    07.04.2020  06.04.2020  Y Tue
    08.04.2020  07.04.2020  Y Wed
    ;
    
    %let first_day = '01JAN2020'D;
    %let last_day = "&SYSDATE."D;
    
    %put NOTE: &=first_day;
    %put NOTE: &=last_day;
    
    proc sql;
      create table CAL_CHECK as
      select
        t1.DT_REP 
        , t1.BANK_FLAG
        , ( select MAX(t2.DT_REP) as LAST_BD_BANK_FLAG 
           from CZ_CALENDAR t2 
           where t2.DT_REP < t1.DT_REP and t2.BANK_FLAG='Y'
          )
          as LAST_BD 
      from
        CZ_CALENDAR t1
      where
        dt_rep between &first_day. and &last_day.;
    quit;
    
    proc print data=CAL_CHECK;
      title "SQL";
      format dt_rep last_bd WEEKDATE.;
    run;
    
    proc sort data=cz_calendar;
      by dt_rep;
    
    data cal_check2(where=(dt_rep between &first_day. and &last_day.));
      set cz_calendar;
    
      OUTPUT;
    
      * save dt_rep from most recent bank_flag day;
      retain last_bd;
      if bank_flag = 'Y' then last_bd = dt_rep;
    run;
    
    proc print data=CAL_CHECK2;
      title "DATA step";
      format dt_rep last_bd WEEKDATE.;
    run;
    

    Output
    enter image description here