Search code examples
regexsassas-ds2

Using PCRXFIND with DS2


I want to replicate this using PCRXFIND instead of prxmatch for speed and because prxmatch is not working correctly.

I am trying to find text in a data file. Here is a reproducible example. There is one file with the data to search and another with the search terms. I want to use regular expressions. Normally this is done with prxmatch. I want to do it with PCRXFIND because I am working inside DS2. I use a hash iterator to iterate through the search terms or each observation in the data set.

The search result do not follow any pattern I can recognize. I would be happy for any suggestions. I posted this on sas communities with not results.

    * ds2 PCRXFIND example;

data person;
    input name $ dept $;
    datalines;
John Sales
Mary Acctng
Joe Findme
Sue Hereiam
;
run;


data searchterms;
    infile datalines missover;
    input s_index $ term $;
datalines;
1   Hereiam
2   Findme
3   Acc
;
run;

proc contents data=searchterms; run;
proc print data=searchterms; run;

proc ds2;
    data search_results (overwrite=yes);

    dcl double rc c ;
    declare char(8) s_index;
    declare char(8) term;
    declare char(11) name dept;
    declare char(1) c_options;
    declare char(20) search_term search_text;
    dcl package hash h(1, '{select s_index, term from searchterms}');
    dcl package hiter hi('h');
        method init();
            c_options = 'i';
            rc = h.defineKey('s_index');
            rc = h.defineData('term');
            rc = h.defineDone();
        end;
        method run();
            dcl double rc;
            set {select  name, dept from person};
            rc = hi.first();
            do while(rc=0);
                c = prxmatch('/'||compress(term)||'/i',name||'   '||dept);
                search_term = '/'||compress(term)||'/i';
                search_text = name||'   '||dept;
                rc = hi.next();
                output;

            end;
        end;
    enddata;
run;
quit;

Solution

  • In SAS 9.4M5 a new package was added for regular expression matching, DS2 PCRXFIND Package. The below is an example of using that package along with a custom list object package to complete your task.

    proc ds2;
    package prxElement/overwrite=yes;
      declare package prxElement next;
      declare int                id;
      declare package pcrxfind   prx;
    
      method prxElement( int                               id,
                         varchar(32767) character set utf8 regex);
        this.id = id;
        prx = _new_ pcrxfind(regex);
      end;
    endpackage;
    run;
    
    package prxList/overwrite=yes;
      declare package prxElement head;
      declare int count;
    
      method prxList();
        head  = null; *null is an empty list;
        count = 0;
      end;
    
      method last() returns package prxElement;
        declare package prxElement item;
        item = head;
        do while ( ^null(item.next) );
          item = item.next;
        end;
        return item;
      end;
    
      method add(package prxElement element);
        if ( ^null(head) ) then do;
          declare package prxElement joinTo;
          joinTo=last();
          joinTo.next = element;
          count = count+1;
        end;
        else do;
          head = element;
          count = 1;
        end;
      end;
    endpackage;
    run;
    
    data results(overwrite=yes);
    declare double rc;
    drop term;
    declare package pcrxfind   prx;
    declare package prxElement item;
    declare package prxList    prxList;
    method init();
      declare int i;
      prxList=_new_ prxList();
      do i=1 to &termdim;
        set searchterms;
        item=_new_ prxElement(s_index,cats('/',term,'/i'));
        prxList.add(item);
      end;
    end;
    method run();
      declare int i;
      set person;
      item = prxList.head;
      do i=1 to prxList.count;
        prx = item.prx;
        s_index = item.id;
        rc = (prx.Match(catx(' ',name,dept)))>0 ;
        output;
        item=item.next;
      end;
    end;
    enddata;
    run;
    quit;