Search code examples
sas

Conditional replacement of labels based on another data set


suppose to have the following data set:

 ID          Label      
0001        0001_1   
0001        0001_1   
0001        0001_1   
0001        0001_1   
0001        0001_1   
0001        0001_1   
0002        0002_1   
0002        0002_1  
0002        0002_2
0002        0002_2
0002        0002_3
0002        0002_3   

and another one:

 ID          Label      
0001        0001_1   
0001        0001_1   
0001        0001_2   
0001        0001_2   
0001        0001_3   
0001        0001_3   
0002        0002_1   
0002        0002_1  
0002        0002_2
0002        0002_2
0002        0002_3
0002        0002_3  

You want the following: if in the first dataset there is only one type of Label (i.e., 0001_1), the second dataset should have that type. Otherwise if there are multiple labels nothing must be done. The desired output should be:

 ID          Label      
0001        0001_1   
0001        0001_1   
0001        0001_1   
0001        0001_1   
0001        0001_1   
0001        0001_1  
0002        0002_1   
0002        0002_1  
0002        0002_2
0002        0002_2
0002        0002_3
0002        0002_3   

Thank you in advance

Best


Solution

  • You will want to compute the groups in the first table that have a single label in aggregate and apply that label to the groups in the second table.

    Example:

    Computation with PROC FREQ and application via MERGE.

    data have1;
      call streaminit(20231);
    
      do id = 1 to 10;
        do seq = 1 to rand('integer', 10) + 2;
          if mod(id,2) = 0 
            then label = 'AAA';
            else label = repeat(byte(64+rand('integer', 26)),2);
          output;
        end;
      end;
    run;
    
    data have2;
      call streaminit(20232);
    
      do id = 1 to 10;
        do seq = 1 to rand('integer', 12) + 2;
          label = repeat(byte(64+rand('integer', 26)),2);
          output;
        end;
      end;
    run;
    
    proc freq noprint data=have1;
      by id;
      table label / out=one_label(where=(percent=100));
    run;
    
    data want2;
      merge 
        have2
        one_label(keep=id label rename=(label=have1label) in=reassign)
      ;
      by id;
      if reassign then label = have1label;
      drop have1label;
    run;
    

    Same result achieved with SQL code, performing computation in a sub-select and using COALESCE for application.

    proc sql;
      create table want2 as 
      select 
        have2.id
      , coalesce(singular.onelabel, have2.label) as label
      from 
        have2
      left join 
        ( select unique id, label as onelabel 
          from have1 
          group by id 
          having count(distinct label) = 1
        ) as singular
      on 
        have2.id = singular.id
      ;