Search code examples
sasrenametransposesas-macro

SAS code to transpose and rename variables in a specific column before importing


I am using the macro provided below to import several csv files which works great. I am however struggling to transpose columns 3 and 6 in my dataset from columns to rows before importing. I would then like to rename these columns as date and study respectively.

(Right now for example each file has a different date in column 3 as the column name which I would like to bring down to the row and give the column a new name i.e. date such that when I set all the imported files one below the other all will be in the column called date)

I would not know in advance what column 3 would be named in each of my files. So for example in file 1 it can be named XYZand YYX in file 2 and I want to name both date and also bring both XYZ and YYX to row instead. Examples are below:

In File 1:

| Pt | Rt | XYZ |
|----|----|------|
| 1  | 2  |       |

In File 2:

| Pt | Rt | YYZ |
|----|----|------|
| 3  | 4  |       |

The result should look like:

| Pt | Rt | date |
|----|----|-------|
| 1  | 2  | XYZ |
| 3  | 4  | YYX |

Code:

%*Creates a list of all files in the DIR directory with the specified extension (EXT);

%macro list_files(dir,ext);

      %local filrf rc did memcnt name i;

      %let rc=%sysfunc(filename(filrf,&dir));

      %let did=%sysfunc(dopen(&filrf));



      %if &did eq 0 %then

            %do;

                  %put Directory &dir cannot be open or does not exist;



                  %return;

            %end;



      %do i = 1 %to %sysfunc(dnum(&did));

            %let name=%qsysfunc(dread(&did,&i));



            %if %qupcase(%qscan(&name,-1,.)) = %upcase(&ext) %then

                  %do;

                        %put &dir\&name;

                        %let file_name =  %qscan(&name,1,.);

                        %put &file_name;



                        data _tmp;

                              length dir $512 name $100;

                              dir=symget("dir");

                              name=symget("name");

                              path = catx('\',dir,name);

                              the_name = substr(name,1,find(name,'.')-1);

                        run;



                        proc append base=list data=_tmp force;

                        run;



                        quit;



                        proc sql;

                              drop table _tmp;

                        quit;



                  %end;

            %else %if %qscan(&name,2,.) = %then

                  %do;

                        %list_files(&dir\&name,&ext)

                  %end;

      %end;



      %let rc=%sysfunc(dclose(&did));

      %let rc=%sysfunc(filename(filrf));

%mend list_files;



%*Macro to import a single file, using the path, filename and an output dataset name must be specified;

%macro import_file(path, file_name, dataset_name );



      proc import

            datafile="&path.\&file_name."

            dbms=csv

            out=&dataset_name replace;

      run;



%mend;



*Create the list of files, in this case all CSV files;

%list_files(C:\Users\baidw002\Documents\1 BCH-LJAF\Real data transfer (BCH to UAB)\CGM\cgmtestfiles\machine\csv, csv);



%*Call macro once for each entry in the list table created from the %list_files() macro;

data _null_;

      set list;

      string = catt('%import_file(', dir, ', ',  name,', ', catt('test', put(_n_, z2.)), ');');

      call execute (string);

run;

Code originally from: https://github.com/statgeek/SAS-Tutorials/blob/master/Import_all_files_one_type

Edit1: As suggested in the first comment.

Edit2: added link to the source of the code.


Solution

  • Why not just do the whole thing with one data step? SAS can read CSV files directly without using PROC IMPORT.

    Use a wildcard in the filename to read all of the files at once. When you are on the first line of a file then read the DATE from the header row.

    %let path=C:\Users\baidw002\Documents\1 BCH-LJAF\Real data transfer (BCH to UAB)\CGM\cgmtestfiles\machine\csv;
    data want ;
      length pt 8 rt 8 date 8 ;
      informat date anydtdte.;
      format date yymmdd10.;
    
      length dummy $1 fname $256;
      infile "&path/*.csv" filename=fname truncover dsd ;
      input @;
      if fname ne lag(fname) then do;
        input 2*dummy date ;
        retain date;
      end;
      input pt rt ;
      drop dummy;
    run;
    

    So if I make up some dummy files:

    %let path=%sysfunc(pathname(work));
    data _null_;
      file "&path/test1.csv";
      put 'pt,rt,3/4/19' / '1,2';
      file "&path/test2.csv";
      put 'pt,rt,4/5/19' / '3,4';
    run;
    

    I get this result:

    Obs    pt    rt          date
    
     1      1     2    2019-03-04
     2      3     4    2019-04-05