Search code examples
sassas-macro

Checking whether the DS has variable value if the variable has missing values then drop the column


am passing a DS in macro parameter with var= if its corresponding variable has same value but the variables has all missing values then drop it.

DATA details;
INPUT id name $ dept $ salary;
datalines;
01 John . 10000
02 Mary . 20000
03 Priya . 30000
05 Ram . 25000

;

DATA newdetails;
INPUT var_name $ var_core $;
DATALINES;
id perm
name perm
dept perm
salary req

;

      %macro core_check(inds=,newds=,var_core_val= );
      proc sql noprint;
      select var_name 
      into :varname separated by ' '
      from &newds
      where var_core="&var_core_val.";
      quit;

      %let nvar=&sqlobs;
      %put &varname;

     %do i=1 %to &nvar;
     %let var&i=%scan(&varname,&i);
     proc sql;
     select count(*)
     into :nobs
     from &inds where &&var&i is not missing ;
     %put this = &nobs;
     quit;
     %end;
     %mend;
     %core_check(inds=work.details,newds=work.newdetails,var_core_val=perm); 

Solution

  • Here is one way to check for empty variables using the NLEVELS output of PROC FREQ. Note that the ODS table might not create the NMissLevels or NNonMissLevels variables based on the results.

    So for your problem we could make a macro that takes as its input the name of the dataset to check, the dataset to create and the dataset with the metadata about which variables are optional. First get the list of variables to check. Then check them using PROC FREQ. Then use a data step to generate a macro variable with the list of empty variables.

    %macro drop_optional(inds=,newds=,metadata= );
      %local varlist n emptyvars ;
      proc sql noprint;
        select var_name 
         into :varlist separated by ' '
         from &metadata
         where var_core='perm'
        ;
      quit;
      %let n=&sqlobs;
      %if (&n) %then %do;
        ods output nlevels=nlevels;
        proc freq nlevels data=&inds ;
          tables &varlist / noprint ;
        run;
        data nlevels;
          length TableVar $32 NLevels NMissLevels NNonMissLevels 8;
          set nlevels end=eof;
          nmisslevels+0;
          nnonmisslevels=nlevels-nmisslevels;
          length emptyvars $32767;
          retain emptyvars;
          if nnonmisslevels=0 then emptyvars=catx(' ',emptyvars,tablevar);
          if eof then call symputx('emptyvars',emptyvars);
        run;
      %end;
      data &newds;
        set &inds (drop=&emptyvars);
      run;
    %mend drop_optional;
    

    So let's use your sample data.

    data details;
      input id name $ dept $ salary;
    datalines;
    01 John . 10000
    02 Mary . 20000
    03 Priya . 30000
    05 Ram . 25000
    ;
    
    data metadata;
      input var_name $ var_core $;
    DATALINES;
    id perm
    name perm
    dept perm
    salary req
    ;
    

    And call the macro.

    %drop_optional(inds=details,newds=details_new,metadata=metadata);