| Perl regular expressions
|
Perl Regular Expressions are supported beginning with SAS®9.
This example parses output from the DOS dir command. The example compiles five regexp, one or each format of line that the dir command may produce. The regexp only need to be compiled once with PRXPARSE since the regexp never change within the program.
When a line is read from the dir command, we use each of the regexp to determine what type of line was output. Then, the PRXPOSN call routine is used to extract parts of the line for output.
/* Parse a DOS "dir" command. Sample dir is below. */
/*
Volume in drive \\ge\U5 is U5
Volume Serial Number is 0502-2B65
Directory of \\ge\U5\jason\SAS\playperl6
11/16/2001 11:49 AM <DIR> .
10/29/2001 09:20 AM <DIR> ..
11/14/2001 11:29 AM 807,443 comp.mvs
11/16/2001 09:09 AM <DIR> sasuser
11/14/2001 03:01 PM <DIR> script
11/15/2001 01:11 PM <DIR> src
10/01/2001 10:04 AM 1,273 startup.cpr
10/23/2001 01:13 PM <DIR> Backup
11/16/2001 11:49 AM 1,030 prxfunc.lst
11/14/2001 10:34 AM 112,377 prxfunc.bench.log
11/15/2001 01:35 PM 27,087 prxpsubst.log
11/14/2001 10:37 AM 33,828 prxptst.bench.log
11/14/2001 10:57 AM 27,087 prxpsubst.bench.log
11/14/2001 01:29 PM 129,352 comp.dntno
11/15/2001 01:36 PM 33,828 prxptst.log
11/16/2001 11:44 AM 5,676 prxtmp.log
11/16/2001 11:49 AM 116,289 prxfunc.batch.log
17 File(s) 1,295,270 bytes
9,685,762,048 bytes free
*/
filename dirpipe pipe 'dir /4';
%let prxdate=\d\d\/\d\d\/\d\d\d\d;
%let prxtime=\d\d:\d\d (?:AM|PM);
%let prxsize=[\d,]+;
data directory;
if _N_ = 1 then do;
/* Compile five regexp for each type of line we're interested in*/
retain re_file re_dir re_total re_free re_dirname;
re_file = prxparse("/(&prxdate)\s+(&prxtime)\s+(&prxsize)\s+(\S.*)/");
re_dir = prxparse("/(&prxdate)\s+(&prxtime)\s+<DIR>\s+(\S.*)/");
re_total = prxparse("/\S+\sFile\(s\)\s+(&prxsize)\s+bytes/");
re_free = prxparse("/(&prxsize)\s+bytes free/");
re_dirname = prxparse("/Directory of (.+)/");
end;
format date MMDDYY. time HHMM.;
length dirname name $ 128;
keep dirname date time size name type;
retain total free dirname;
infile dirpipe end=printinfo;
input;
if prxmatch(re_file, _infile_) then do;
numfiles+1;
type = 'FILE';
call prxposn(re_file, 1, pos, len);
date = input(substr(_infile_, pos, len), MMDDYY10.);
call prxposn(re_file, 2, pos, len);
time = input(substr(_infile_, pos, len), TIME6.);
call prxposn(re_file, 3, pos, len);
size = input(substr(_infile_, pos, len), COMMA32.);
call prxposn(re_file, 4, pos, len);
name = substr(_infile_, pos, len);
output;
end;
else if prxmatch(re_dir, _infile_) then do;
numdirs+1;
type = 'DIR';
call prxposn(re_dir, 1, pos, len);
date = input(substr(_infile_, pos, len), MMDDYY10.);
call prxposn(re_dir, 2, pos, len);
time = input(substr(_infile_, pos, len), TIME6.);
call prxposn(re_dir, 3, pos, len);
name = substr(_infile_, pos, len);
output;
end;
else if prxmatch(re_dirname, _infile_) then do;
call prxposn(re_dirname, 1, pos, len);
dirname = substr(_infile_, pos, len);
end;
else if prxmatch(re_total, _infile_) then do;
call prxposn(re_total, 1, pos, len);
total = input(substr(_infile_, pos, len), COMMA32.);
end;
else if prxmatch(re_free, _infile_) then do;
call prxposn(re_free, 1, pos, len);
free = input(substr(_infile_, pos, len), COMMA32.);
end;
else do;
/* ignore unrecognized lines */
end;
if printinfo then do;
put 'Directory name: ' dirname;
put 'Number of Files: ' numfiles;
put 'Number of Dirs: ' numdirs;
put 'Bytes used: ' total COMMA15. -L;
put 'Bytes free: ' free COMMA15. -L;
end;
run;