FOCUS AREAS

Perl regular expressions

Base SAS

Example: Parsing output from DOS dir command

Perl Regular Expressions are supported beginning with SAS®9.


This example parses output from the DOS dir command. The example compiles five regexp, one or each format of line that the dir command may produce. The regexp only need to be compiled once with PRXPARSE since the regexp never change within the program.

When a line is read from the dir command, we use each of the regexp to determine what type of line was output. Then, the PRXPOSN call routine is used to extract parts of the line for output.

/* Parse a DOS "dir" command.  Sample dir is below. */
/*
    Volume in drive \\ge\U5 is U5
    Volume Serial Number is 0502-2B65

    Directory of \\ge\U5\jason\SAS\playperl6

   11/16/2001  11:49 AM      <DIR>          .
   10/29/2001  09:20 AM      <DIR>          ..
   11/14/2001  11:29 AM             807,443 comp.mvs
   11/16/2001  09:09 AM      <DIR>          sasuser
   11/14/2001  03:01 PM      <DIR>          script
   11/15/2001  01:11 PM      <DIR>          src
   10/01/2001  10:04 AM               1,273 startup.cpr
   10/23/2001  01:13 PM      <DIR>          Backup
   11/16/2001  11:49 AM               1,030 prxfunc.lst
   11/14/2001  10:34 AM             112,377 prxfunc.bench.log
   11/15/2001  01:35 PM              27,087 prxpsubst.log
   11/14/2001  10:37 AM              33,828 prxptst.bench.log
   11/14/2001  10:57 AM              27,087 prxpsubst.bench.log
   11/14/2001  01:29 PM             129,352 comp.dntno
   11/15/2001  01:36 PM              33,828 prxptst.log
   11/16/2001  11:44 AM               5,676 prxtmp.log
   11/16/2001  11:49 AM             116,289 prxfunc.batch.log
                 17 File(s)      1,295,270 bytes
                             9,685,762,048 bytes free
*/

filename dirpipe pipe 'dir /4';
%let prxdate=\d\d\/\d\d\/\d\d\d\d;
%let prxtime=\d\d:\d\d (?:AM|PM);
%let prxsize=[\d,]+;
data directory;
   if _N_ = 1 then do;
      /* Compile five regexp for each type of line we're interested in*/
      retain re_file re_dir re_total re_free re_dirname;
      re_file = prxparse("/(&prxdate)\s+(&prxtime)\s+(&prxsize)\s+(\S.*)/");
      re_dir = prxparse("/(&prxdate)\s+(&prxtime)\s+<DIR>\s+(\S.*)/");
      re_total = prxparse("/\S+\sFile\(s\)\s+(&prxsize)\s+bytes/");
      re_free = prxparse("/(&prxsize)\s+bytes free/");
      re_dirname = prxparse("/Directory of (.+)/");
   end;

   format date MMDDYY. time HHMM.;
   length dirname name $ 128;
   keep dirname date time size name type;
   retain total free dirname;

   infile dirpipe end=printinfo;
   input;

   if prxmatch(re_file, _infile_) then do;
      numfiles+1;
      type = 'FILE';
      call prxposn(re_file, 1, pos, len);
      date = input(substr(_infile_, pos, len), MMDDYY10.);
      call prxposn(re_file, 2, pos, len);
      time = input(substr(_infile_, pos, len), TIME6.);
      call prxposn(re_file, 3, pos, len);
      size = input(substr(_infile_, pos, len), COMMA32.);
      call prxposn(re_file, 4, pos, len);
      name = substr(_infile_, pos, len);
      output;
   end;
   else if prxmatch(re_dir, _infile_) then do;
      numdirs+1;
      type = 'DIR';
      call prxposn(re_dir, 1, pos, len);
      date = input(substr(_infile_, pos, len), MMDDYY10.);
      call prxposn(re_dir, 2, pos, len);
      time = input(substr(_infile_, pos, len), TIME6.);
      call prxposn(re_dir, 3, pos, len);
      name = substr(_infile_, pos, len);
      output;
   end;
   else if prxmatch(re_dirname, _infile_) then do;
      call prxposn(re_dirname, 1, pos, len);
      dirname = substr(_infile_, pos, len);
   end;
   else if prxmatch(re_total, _infile_) then do;
      call prxposn(re_total, 1, pos, len);
      total = input(substr(_infile_, pos, len), COMMA32.);
   end;
   else if prxmatch(re_free, _infile_) then do;
      call prxposn(re_free, 1, pos, len);
      free = input(substr(_infile_, pos, len), COMMA32.);
   end;
   else do;
      /* ignore unrecognized lines */
   end;

   if printinfo then do;
      put 'Directory name:  ' dirname;
      put 'Number of Files: ' numfiles;
      put 'Number of Dirs:  ' numdirs;
      put 'Bytes used:      ' total COMMA15. -L;
      put 'Bytes free:      ' free COMMA15. -L;
   end;
run;


Your Turn

The developers, testers and documentation folk that bring you Base SAS Software are very excited about the potential of these new capabilities of the SAS System. You can send electronic mail to Base.Research@sas.com with your comments.