Search code examples
sascombinationsmodeling

This SAS code performs 10C9 for both means and CVs for width and length variables, is there a way to do 10C8, down to 10C1 in the same SAS code?


This SAS code performs 10 Combination 9 for both means and CVs for width and length variables, is there a way to do 10 Combination 8, down to 10C1 in the same SAS code?

data yourdata;          
   input Obs sample_index $ width length;           
   datalines;           
1   Hand1   112.92  48.4075
2   Hand1   132.315 69.9522
3   Hand1   233.248 39.021
4   Hand1   282.457 41.0748
5   Hand1   111.281 31.0925
6   Hand1   197.349 82.4425
7   Hand1   118.723 41.2737
8   Hand1   293.993 22.9405
9   Hand1   272.179 83.9418
10  Hand1   204.086 69.144
11  Hand2   132.35  83.6597
12  Hand2   113.897 46.8513
13  Hand2   185.176 94.1936
14  Hand2   135.936 56.0927
15  Hand2   265.141 64.873
16  Hand2   227.11  80.9043
17  Hand2   265.468 86.7117
18  Hand2   249.726 64.1256
19  Hand2   124.951 43.8425
20  Hand2   285.231 66.0117
;           
            
%let max_combinations = 9; /* Maximum number of values in a combination */          
%let total_obs_hand1 = 10; /* Total number of Hand1 observations in my dataset */           
%let total_obs_hand2 = 10; /* Total number of Hand2 observations in my dataset */           
            
array width_vals[&max_combinations] width1-width&max_combinations;          
array length_vals[&max_combinations] length1-length&max_combinations;           
            
/* Calculate all possible combinations of 9 values for Hand1 */         
proc sql;           
   create table combinations_hand1 as           
   select distinct a.width as width1, b.width as width2, c.width as width3,         
                     d.width as width4, e.width as width5, f.width as width6,           
                     g.width as width7, h.width as width8, i.width as width9,           
                     a.length as length1, b.length as length2, c.length as length3,         
                     d.length as length4, e.length as length5, f.length as length6,         
                     g.length as length7, h.length as length8, i.length as length9          
   from yourdata as a, yourdata as b, yourdata as c, yourdata as d, yourdata as e,          
        yourdata as f, yourdata as g, yourdata as h, yourdata as i          
   where a.sample_index = 'Hand1' and b.sample_index = 'Hand1'          
     and c.sample_index = 'Hand1' and d.sample_index = 'Hand1'          
     and e.sample_index = 'Hand1' and f.sample_index = 'Hand1'          
     and g.sample_index = 'Hand1' and h.sample_index = 'Hand1'          
     and i.sample_index = 'Hand1'           
     and a.Obs < b.Obs and b.Obs < c.Obs and c.Obs < d.Obs and d.Obs < e.Obs            
     and e.Obs < f.Obs and f.Obs < g.Obs and g.Obs < h.Obs and h.Obs < i.Obs            
   ;            
quit;           
            
/* Calculate all possible combinations of 9 values for Hand2 */         
proc sql;           
   create table combinations_hand2 as           
   select distinct a.width as width1, b.width as width2, c.width as width3,         
                     d.width as width4, e.width as width5, f.width as width6,           
                     g.width as width7, h.width as width8, i.width as width9,           
                     a.length as length1, b.length as length2, c.length as length3,         
                     d.length as length4, e.length as length5, f.length as length6,         
                     g.length as length7, h.length as length8, i.length as length9          
   from yourdata as a, yourdata as b, yourdata as c, yourdata as d, yourdata as e,          
        yourdata as f, yourdata as g, yourdata as h, yourdata as i          
   where a.sample_index = 'Hand2' and b.sample_index = 'Hand2'          
     and c.sample_index = 'Hand2' and d.sample_index = 'Hand2'          
     and e.sample_index = 'Hand2' and f.sample_index = 'Hand2'          
     and g.sample_index = 'Hand2' and h.sample_index = 'Hand2'          
     and i.sample_index = 'Hand2'           
     and a.Obs < b.Obs and b.Obs < c.Obs and c.Obs < d.Obs and d.Obs < e.Obs            
     and e.Obs < f.Obs and f.Obs < g.Obs and g.Obs < h.Obs and h.Obs < i.Obs            
   ;            
quit;           
            
/* Calculate the mean width and length for each combination for Hand1 */            
data means_hand1;           
   set combinations_hand1;          
   num_values = &max_combinations;          
   width_mean = mean(of width1-width&max_combinations);         
   length_mean = mean(of length1-length&max_combinations);          
run;            
            
/* Calculate the mean width and length for each combination for Hand2 */            
data means_hand2;           
   set combinations_hand2;          
   num_values = &max_combinations;          
   width_mean = mean(of width1-width&max_combinations);         
   length_mean = mean(of length1-length&max_combinations);          
run;            
            
/* Calculate the coefficient of variation (CV) for width and length for Hand1 */            
data cv_hand1;          
   set means_hand1;         
   cv_width = (std(of width1-width&max_combinations) / width_mean) * 100;           
   cv_length = (std(of length1-length&max_combinations) / length_mean) * 100;           
run;            
            
/* Calculate the coefficient of variation (CV) for width and length for Hand2 */            
data cv_hand2;          
   set means_hand2;         
   cv_width = (std(of width1-width&max_combinations) / width_mean) * 100;           
   cv_length = (std(of length1-length&max_combinations) / length_mean) * 100;           
run;            
            
/* Display the resulting means and CVs for Hand1 */         
proc print data=cv_hand1;           
run;            
            
/* Display the resulting means and CVs for Hand2 */         
proc print data=cv_hand2;           
run;            

I need to create adjust the SAS code to do all possible 10C9, 10C8, 10C7 down to 10C1 combinations for width and length, I would appreciate any insights.


Solution

  • You can use CALL LEXCOMB() to generate all combinations of K values from N values.

    You can use PROC MEANS (aka PROC SUMMARY) to calculate the MEAN and CV of a variable.

    So use a data step to generate the combinations into a dataset and then use PROC SUMMARY to find the CV.

    First let's make some data that has the observation number as a variable. And then use PROC TRANSPOSE to put those numbers into a list of variables.

    data have;
      row+1; 
      input id :$5. width length;
    datalines;           
    Hand1   112.92  48.4075
    Hand1   132.315 69.9522
    Hand1   233.248 39.021
    Hand1   282.457 41.0748
    Hand1   111.281 31.0925
    Hand1   197.349 82.4425
    Hand1   118.723 41.2737
    Hand1   293.993 22.9405
    Hand1   272.179 83.9418
    Hand1   204.086 69.144
    Hand2   132.35  83.6597
    Hand2   113.897 46.8513
    Hand2   185.176 94.1936
    Hand2   135.936 56.0927
    Hand2   265.141 64.873
    Hand2   227.11  80.9043
    Hand2   265.468 86.7117
    Hand2   249.726 64.1256
    Hand2   124.951 43.8425
    Hand2   285.231 66.0117
    ; 
    
    proc transpose data=have out=wide(drop=_name_) prefix=row;
      by id;
      var row;
    run;
    

    To code the CALL LEXICOMB() statement we need to know how many ROW variables to include. You could calculate the maximum size, but for this example we know that the largest group has 10 observations per BY group.

    %let n=10;
    

    So let's now use COMB() and CALL LEXICOMB() to generate all of the possible combinations of taking N-1 down to 2 observations at time.

    We can use the POINT= option on the SET statement to reset the array of observation numbers and also to pull in the individual observations values of WIDTH and LENGTH.

    data comb(keep=id n k i row:) tall(keep=id n k i width length);
      do group=1 to nobs;
       set wide nobs=nobs;
       n=n(of row:);
       do k=n-1 to 2 by -1;
         set wide(keep=row1-row&n) point=group ;
         do i=1 to comb(n,k);
           call lexcomb(i,k,of row1-row&n);
           output comb;
           array row row1-row&n;
           do j=1 to k;
             p=row[j];
             set have(keep=width length) point=p;
             output tall;
           end;
         end;
       end;
      end;
    run;
    

    Now let's calculate the MEAN and CV for the WIDTH and LENGTH of the various groups.

    proc summary data=tall ;
      by id n descending k i;
      var width length;
      output out=stats(drop=_type_) n= mean= cv= /autoname;
    run;
    

    And combine the results back with the dataset with the list of observations used. Let's blank out the observation numbers not used to make it clearer.

    data want;
      merge stats comb;
      by id descending k i ;
      array row row1-row&n;
      do j=k+1 to &n;
        row[j]=.;
      end;
      drop j;
    run;
    

    If we limit the groups to 4 and 3 observations each enter image description here

    then the results look like:

    Result enter image description here