Search code examples
sas

Customize the proc freq output


suppose to have the following table:

       Patient          weight_class       Region       Social_Class        Year
          1                 1                 A             0               2015
          2                 2                 A             1               2015
          3                 1                 A             0               2015
          4                 4                 B             0               2015
          5                 4                 C             2               2015
          6                 3                 C             2               2015
          7                 1                 C             2               2015

Is there a way to do the proc freq so that the output looks like this?

enter image description here

Finally, is there a way to write a macro/code in an "universal" way so that the format will be generated independently form the name of a specific variable? (To note: this is not fundamental with respect to the first question).

Thank you very much!


Solution

  • For just data set output you can follow Proc FREQ with Proc TRANSPOSE.

    proc sort data=have;
      by year region social_class weight_class;
    proc freq noprint data=have;
      by year region social_class;
      table weight_class / out=freqs;
    proc transpose data=freqs out=want(drop=_name_ _label_) prefix=weight_class_;
      by year region social_class;
      id weight_class;
      var count;
    run;
    

    The output has dependencies on appearance ordering of values identifier (id) and does not have rows for some combinations year, region, social_class.

    enter image description here

    The ordering can be adjusted by adding more data that forces the transposed column ordering. Addtionally, a key_combinations data set will need to be created to force the groups that have no counts.

    Forcing data

    /* Predefined (includes year 2016 not in data) */
    
    data years;   do year = 2015 to 2016; output; end; run;
    data regions; do region = 'A', 'B', 'C'; output; end; run;
    data socials; do social_class = 0 to 2; output; end; run;
    data weights; do weight_class = 1 to 4; output; end; run;
    
    proc sql;
      create table all_combinations as
      select year, region, social_class, weight_class 
      from years, regions, socials, weights
      ;
    

    Or perhaps, just the independent coverage of the by variables

    /* Assumed (contextual coverage) */
    
    proc sql;
      create table all_combinations as
      select year, region, social_class, weight_class 
      from (select distinct year from have)
         , (select distinct region from have)
         , (select distinct social_class from have)
         , (select distinct weight_class from have)
      ;
    

    Merge the forced data with the original data and setup a value for Proc FREQ WEIGHT.

    data have_all;
      merge have(in=have) all_combinations ;
      by year region social_class weight_class ;
      freq=have;
    run;
    
    proc freq noprint data=have_all;
      by year region social_class;
      table weight_class / out=freqs_all;
      weight freq / zeros;
    run;
    proc transpose data=freqs_all out=want(drop=_name_ _label_) prefix=weight_class_;
      by year region social_class;
      id weight_class;
      var count;
    run;
    

    enter image description here

    The wide (pivoted) style of data is often not overly useful for addtional analysis and it is better to leave the data in categorical form and instead generate a pivoted report using Proc TABULATE

    Plain tabulate (does not cover all combinations, only those present in the data)

    proc tabulate data=have;
      class region social_class year weight_class;
      table 
        year * region * social_class
      , weight_class * n
      ;
    run;
    

    enter image description here

    Tabulate with CLASSDATA=

    proc format;
      value na .='na';
    
    proc tabulate data=have classdata=all_combinations;
      class year region social_class weight_class;
      table 
        year * region * social_class
      , weight_class * n=' '*f=na.
      ;
    run;
    

    enter image description here