filename saslibx "SASCTP.V5.FMTEST" lrecl=32760 blksize=32760; /*-----------------------------------------------------------------*/ /* This macro will read an entire load module PDS, extracting out */ /* all the old-style formats (ones generated by PROC FORMAT). */ /* The members of the PDS must have an SSI starting with AE to be */ /* a format. It is possible that formats in assembler, not created */ /* by PROC FORMAT, will be present, but those cannot be processed. */ /* */ /* %convert_fmtlib(v5fmtlib=SASLIBX, cntlin=CNTLIN, fmtlib=work, */ /* fmtcat=FORMATS */ /* */ /* where */ /* */ /* cntlin the name of the cntlin data set to create */ /* ddname the DDname for the load module PDS */ /* */ /* Once the cntlin data set is created, it can be run through PROC */ /* FORMAT using the CNTLIN= option. */ /* */ /* Note that this macro is effectively equivalent to the V5TOV6 */ /* migration procedure that was available in SAS Version 6, */ /* although the only functionality provided by this macro is for */ /* conversion of formats. (V5TOV6 converted other entities as */ /* well). */ /* */ /* This macro can be run using Version 8 of the SAS System or */ /* higher. */ /*-----------------------------------------------------------------*/ %macro convert_fmtlib(v5fmtlib=SASLIBX,cntlin=CNTLIN,fmtlib=WORK, fmtcat=FORMATS); %put %str(************************************************************); %put %str(* Converting old V5 format library allocated to DDNAME: ); %put %str(* &V5FMTLIB ); %put %str(* ); %put %str(* These converted formats will be written to: ); %put %str(* &FMTLIB..&FMTCAT ); %put %str(************************************************************); %let v5fmtlibrc=%sysfunc(fexist(&v5fmtlib)); %if &v5fmtlibrc eq %str(0) %then %do; %put ***************************************************************; %put ERROR: You must pre-allocate the V5 format library.; %put NOTE: You can do this with a FILENAME statement or externally.; %put NOTE: Default fileref/DDname is SASLIBX.; %put NOTE: To tell this routine the fileref use V5FMTLIB= keyword.; %put ***************************************************************; %put; %put; %put ***************************************************************; %put Syntax to invoke this routine is:; %put; %put CONVERT_FMTLIB(V5FMTLIB=saslibx,FMTLIB=work,FMTCAT=library); %put; %put V5FMTLIB= (required) fileref/DDname of V5 format library.; %put Must be preallocated before invoking this macro; %put Default value: SASLIBX; %put FMTLIB= (required) libref/DDname of SAS library for formats.; %put Must be preallocated before invoking this macro; %put Default value: WORK; %put FMTCAT= (optional) format catalog name to store formats; %put Default value: FORMATS; %put; %put ***************************************************************; %end; %else %do; /*-----------------------------------------------------------------*/ /* First create a template cntlin data set that will contain all */ /* the necessary variables with the desired lengths. The start */ /* and end values have a max of 16 in old-style formats, and the */ /* labels have a max of 40. Format names have a max of 8. This */ /* data set will have no observations, but will be used as the */ /* BASE= data set for PROC APPEND, so we can be sure that the */ /* proper lengths are maintained. */ /*-----------------------------------------------------------------*/ data &cntlin; length fmtname $8 type $1 start end $16 label $40 hlo $8 sexcl eexcl $1 mult 8 prefix $2 fill $1 min max default noedit 4; retain fmtname ' ' type ' ' start ' ' end ' ' label ' ' hlo ' ' sexcl ' ' eexcl ' ' prefix ' ' fill ' ' mult . min . max . default . noedit .; delete; run; /*-----------------------------------------------------------------*/ /* The FILEEXT=ASIS option needs to be specified here. This option */ /* allows for memnames that have atypical characters to be */ /* processed properly. The character of concern is an underscore, */ /* which is valid in a format name. Without this option, the name */ /* would be translated to use # when accessed in the DATA step, */ /* and this would fail since the memname with a # would not be */ /* found. */ /*-----------------------------------------------------------------*/ options fileext=asis; /*-----------------------------------------------------------------*/ /* All the members are read from the directory blocks of the PDS. */ /* Using LRECL=256 BLKSIZE=256 allows the directory to be read */ /* instead. We are only interested in the member name and the SSI */ /* field, and we are only interested in those members with an SSI */ /* starting with AE, which indicates a format. */ /*-----------------------------------------------------------------*/ %let found_underscore=0; data memlist; infile &v5fmtlib recfm=u lrecl=256 blksize=256; retain mwritten 0; keep memname ssi; input @3 @; do i=1 to 6; input memname $char8. +26 ssi $char4. +2 @; if substr(ssi,1,1) NE 'AE'x then continue; if memname='ffffffffffffffff'x then stop; j = index(memname,'_'); if j then do; ***** memname=translate(memname,'#','_'); if NOT mwritten then call symput('found_underscore','1'); mwritten=1; end; output; end; run; /*-----------------------------------------------------------------*/ /* The following code is no longer used, but left in, in case it */ /* may later be needed. Without the FILEEXT=ASIS option, the PDS */ /* needs to be copied to a temporary location where the memnames */ /* containing underscores can be changed to use # instead. This */ /* code uses PDSCOPY to copy the PDS to TEMPFIL1, using the */ /* OUTTAPE option so it's a sequential stream. TEMPFIL1 is copied */ /* to TEMPFIL2, but with all the records referencing the members */ /* being updated to change _ to #. Then PDSCOPY recreates the */ /* PDS in TEMPPDS, and that PDS is then subsequently used. */ /*-----------------------------------------------------------------*/ /* %if &found_underscore %then %do; filename tempfil1 temp; filename tempfil2 temp; filename temppds temp dsorg=po; proc pdscopy dc indd=&v5fmtlib. outdd=tempfil1 outtape; run; data _null_; infile tempfil1; file tempfil2 dcb=tempfil1; input @; c=substr(_infile_,1,1); length memname $8; if c='H' then do; memname=substr(_infile_,11,8); i=index(memname,'_'); if i then substr(_infile_,11+i-1,1)='#'; end; else if c='D' then do; memname=substr(_infile_,19,8); i=index(memname,'_'); if i then substr(_infile_,19+i-1,1)='#'; end; put _infile_; run; proc pdscopy indd=tempfil2 outdd=temppds intape; run; filename tempfil1 clear; filename tempfil2 clear; %let v5fmtlib=TEMPPDS; %end; */ /*-----------------------------------------------------------------*/ /* Here the list of members is run through, and the %readmem */ /* macro is invoked for each member via CALL EXECUTE. */ /*-----------------------------------------------------------------*/ data _null_; set memlist; length command $200; command='%readmem(' ||"&cntlin.," ||"&v5fmtlib.," || memname||',' ||put(ssi,$hex8.) ||');'; call execute(trim(command)); run; /*-----------------------------------------------------------------*/ /* Clean up after ourselves. */ /*-----------------------------------------------------------------*/ proc delete data=memlist; run; /* %if &found_underscore %then %do; filename temppds clear; %end; */ %end; %mend convert_fmtlib; /*-----------------------------------------------------------------*/ /* This macro will read a member of a load module PDS, expecting */ /* it to be an old-style format (one generated by PROC FORMAT). */ /* It will extract the ranges and labels from this load module */ /* member, and append to a CNTLIN data set the range/label pairs */ /* defined in the format. */ /* */ /* %readmem(cntlin,ddname,memname,ssi); */ /* */ /* where */ /* */ /* cntlin the name of the cntlin data set to append to */ /* ddname the DDname for the load module PDS */ /* memname the member name to be processed */ /* ssi the SSI in hex (should start with AE) */ /*-----------------------------------------------------------------*/ %macro readmem(cntlin,v5fmtlib,memname,ssi); data header(keep=fmtname type min max default) ranges(keep=start end labelnum hlo sexcl eexcl ) labels(keep=labelnum label mult prefix noedit fill); infile &v5fmtlib(&memname) column=c length=l; length fmtname $8 type $1 start end $16 label $40 hlo $8 sexcl eexcl $1 mult 8 prefix $2 fill $1 min max default noedit 4; length buffer $200; /*-------------------------------------------------------------*/ /* The memname starts with @ if an informat is defined. The */ /* memname has a $ if it is a character format. The TYPE */ /* variable should be set as: N=numeric C=character */ /* I=numeric informat J=character informat. Note that TYPE */ /* should be set to P for PICTURE formats, but we won't know */ /* for sure that we have a PICTURE format until later. */ /*-------------------------------------------------------------*/ fmtname=translate("&memname.",'_','#'); if fmtname=:'@' then do; fmtname=substr(fmtname,2); if fmtname=:'$' then type='J'; else type='I'; end; else do; if fmtname=:'$' then type='C'; else type='N'; end; /*-------------------------------------------------------------*/ /* SSI: AElhhdrw l=min hh=max w*16+d=default */ /* r=0 left just, 8=right just */ /*-------------------------------------------------------------*/ min=input(substr("&ssi.",3,1),hex1.); max=input(substr("&ssi.",4,2),hex2.); default=input(substr("&ssi.",8,1),hex2.)*16+ input(substr("&ssi.",6,1),hex1.); /*-------------------------------------------------------------*/ /* All formats have a LH R8,Rnnn instruction, and the nnn is */ /* the offset into the module where the ranges begin. The */ /* binary for LH R8 is 4880. We search for this to begin the */ /* process. Note that if this isn't found, we end up at EOF */ /* and the header/ranges/labels data sets have no observations.*/ /* The offset needs to have the register nibble cleared out. */ /*-------------------------------------------------------------*/ input @'4880'x @; input offset pib2. @; offset=band(offset,0fffx); /*-------------------------------------------------------------*/ /* In V5, the type character N|C|P was added at offset 20, */ /* for Numeric|Character|Picture. */ /* If it's there and it's a P, we know we have a PICTURE */ /* format. */ /*-------------------------------------------------------------*/ input @20 ncp $char1. @; picfmt=(ncp='P'); /*-------------------------------------------------------------*/ /* We head to the offset and get the range count, the length */ /* of the labels, and the bits byte. We can determine OTHER, */ /* UPCASE, JUST, and whether it is a 76.5 or 76.6 format. If */ /* a numeric format, the we read the FUZZ value. */ /*-------------------------------------------------------------*/ input @1 +offset fmt_count pib2. bits pib1. fmt_length pib1. @; other=^^band(bits,080x); upcase=^^band(bits,010x); just=^^band(bits,040x); fmt765 = (offset = 292); fmt766 = (offset = 312); charfmt=(type^='N'); if ^charfmt then input fuzz rb4. @; if upcase then hlo='U'; if just then hlo=trim(hlo)||'J'; /*-------------------------------------------------------------*/ /* If an OTHER= clause was given, then we emit a range for */ /* that. It will always be the first one with LABELNUM=1. */ /* HLO is set for this observation only to have O in it for */ /* OTHER. START and END are set to **OTHER**. */ /*-------------------------------------------------------------*/ if other then do; start='**OTHER**'; end=start; labelnum=1; hlo=trim(hlo)||'O'; output ranges; substr(hlo,length(hlo),1)=' '; end; /*-------------------------------------------------------------*/ /* Note that all subsequent access of data from the load */ /* module is done via the readbytes link routine. This is */ /* because our data can span multiple records, and readbytes */ /* ensures an intact buffer to process. */ /*-------------------------------------------------------------*/ /*-------------------------------------------------------------*/ /* Read through all the ranges and output to the ranges data */ /* set. */ /*-------------------------------------------------------------*/ nlabels=0; do j=1 to fmt_count; /*----------------------------------------------------------*/ /* The old 76.5 numeric formats have 2 8-byte doubles */ /* followed by a long integer subscript. */ /*----------------------------------------------------------*/ if fmt765 then do; len=20; link readbytes; start=substr(buffer,1,8); end=substr(buffer,9,8); sub=input(substr(buffer,17,4),pib4.); x='00'x; end; /*----------------------------------------------------------*/ /* Character formats have 16-byte strings for start and */ /* end and a short subscript. If the string is all '00'x */ /* then it is LOW, if all 'FF'x it is HIGH. */ /*----------------------------------------------------------*/ else if charfmt then do; len=36; link readbytes; start=substr(buffer,1,16); end=substr(buffer,17,16); x=substr(buffer,33,1); sub=input(substr(buffer,35,2),pib2.); low=(verify(substr(start,1,16),'00'x)=0); high=(verify(substr(end,1,16),'FF'x)=0); end; /*----------------------------------------------------------*/ /* Numeric formats have 2 8-byte doubles and a long integer */ /* subscript. */ /*----------------------------------------------------------*/ else do; len=24; link readbytes; start=substr(buffer,1,8); end=substr(buffer,9,8); sub=input(substr(buffer,17,4),pib4.); x=substr(buffer,18,1); end; /*----------------------------------------------------------*/ /* Compute the label number and keep up with the highest */ /* number, since that is the only way we know the number of */ /* labels. */ /*----------------------------------------------------------*/ labelnum = sub - ^other; nlabels=max(nlabels,labelnum); sexcl=' '; eexcl=' '; /*----------------------------------------------------------*/ /* For numeric formats, a missing value can be represented */ /* by 'FF'x in the first byte, and the missing value */ /* character in the last byte. We adjust it back. If the */ /* value is '8000000000000001'x then this indicates an */ /* exclusion, so we set SEXCL or EEXCL to Y accordingly. */ /* START/END are then informatted using RB8. to the number */ /* wanted, which is the formatted out using BEST16. for */ /* the CNTLIN data set. */ /*----------------------------------------------------------*/ if ^charfmt then do; if substr(start,1,1)='FF'x then substr(start,1,1)=substr(start,2,1); if substr(start,1,8)='8000000000000001'x then do; start='0000000000000000'x; sexcl='Y'; end; start=left(put(input(substr(buffer,1,8),rb8.),best16.)); if substr(end,1,8)='8000000000000001'x then do; end='0000000000000000'x; eexcl='Y'; end; else if substr(end,1,1)='FF'x then substr(end,1,1)=substr(end,2,1); end= left(put(input(substr(buffer,9,8),rb8.),best16.)); low=(substr(start,1,8)='FEFFFFFFFFFFFFFF'x); high=(substr(end,1,8)='7FFFFFFFFFFFFFFF'x); end; /*----------------------------------------------------------*/ /* If SEXCL and EEXCL aren't yet set, the exclusion bits */ /* tell us if they need to be set. */ /*----------------------------------------------------------*/ if sexcl=' ' then do; if band(input(x,pib1.),80x) then sexcl='Y'; else sexcl='N'; end; if eexcl=' ' then do; if band(input(x,pib1.),40x) then eexcl='Y'; else eexcl='N'; end; /*----------------------------------------------------------*/ /* Set L and H for low and high in HLO if LOW or HIGH */ /* values were specified. */ /*----------------------------------------------------------*/ if low then do; start='**LOW**'; hlo=trim(hlo)||'L'; end; if high then do; end='**HIGH**'; hlo=trim(hlo)||'H'; end; /*----------------------------------------------------------*/ /* Output our range, and reset HLO so that L and H are */ /* no longer there. */ /*----------------------------------------------------------*/ output ranges; hlo=compress(hlo,'LH'); end; /*-------------------------------------------------------------*/ /* At this point all the ranges have been read and we are at */ /* the label section. There is a dummy label inserted if there */ /* no OTHER= clause. It is skipped here. Then we loop through */ /* and read the labels. Note that the label count has been */ /* determined to be nlabels, but not if we had OTHER= only, so */ /* we ensure nlabels is set accordingly. */ /*-------------------------------------------------------------*/ if ^other then do; len=fmt_length; link readbytes; end; else nlabels=max(nlabels,other); do labelnum=1 to nlabels; /*----------------------------------------------------------*/ /* Read our buffer containing the label. */ /*----------------------------------------------------------*/ len=fmt_length; link readbytes; /*----------------------------------------------------------*/ /* If it's the first label, and it could be PICTURE but we */ /* haven't determined for sure, we can check now and see */ /* if the first 2 bytes of the label are a short integer */ /* between 1 and 24. If so, we assume we have a PICTURE */ /* format. Note that this would apply to PICTURE formats */ /* in 76.6, SAS79, and SAS82. The length is always 40 with */ /* these PICTURE formats. In V5, PICTURE was specified in */ /* the header so this check is not necessary. */ /*----------------------------------------------------------*/ if labelnum=1 and ^fmt765 and ^charfmt and fmt_length=40 and ^picfmt then do; picfmt = 1<=input(substr(buffer,1,2),ib2.)<=24; end; /*----------------------------------------------------------*/ /* For PICTURE formats, we extract the various fields. The */ /* digit selectors are '20'X for 0 and '21'X for 1. */ /*----------------------------------------------------------*/ if picfmt then do; mult=input(substr(buffer,5,8),rb8.); prefix=substr(buffer,13,2); noedit=(substr(buffer,15,1)='01'x); fill=substr(buffer,16,1); label=substr(buffer,17,24); if ^noedit then label=translate(label,'09','2021'x); end; /*----------------------------------------------------------*/ /* The label is copied as is for non-PICTURE labels. */ /*----------------------------------------------------------*/ else do; label=substr(buffer,1,fmt_length); end; output labels; end; /*-------------------------------------------------------------*/ /* The single header observation can be emitted now that we */ /* know for sure if we have a PICTURE format. */ /*-------------------------------------------------------------*/ if picfmt then type='P'; output header; stop; return; /*-----------------------------------------------------------------*/ /* This link routine will read bytes from the load module. len will*/ /* be the number of bytes wanted. l is the length from the LENGTH= */ /* option and c is the column from the COLUMN= option. We simply */ /* read bytes into the buffer if we have them all. Otherwise we */ /* read a byte at a time until we fill the buffer, going to new */ /* records as needed. Note that records of length 20 are considered*/ /* to be continuation records and are ignored. */ /*-----------------------------------------------------------------*/ readbytes:; left=l-c+1; if left>=len then do; input buffer $varying200. len @; end; else do ii=1 to len; if c>l then do; input / @; if l=20 then input / @; end; input byte $char1. @; substr(buffer,ii,1)=byte; end; return; run; /*-----------------------------------------------------------------*/ /* The ranges will be in sorted order by range, and not necessarily*/ /* in sorted order for the labels, so we sort by label number so */ /* we can merge with the labels. */ /*-----------------------------------------------------------------*/ proc sort data=ranges; by labelnum; run; /*-----------------------------------------------------------------*/ /* Create our CNTLIN data set from the 3 data sets. */ /*-----------------------------------------------------------------*/ data cntlinx; merge ranges labels; by labelnum; if _n_=1 then set header; drop labelnum; run; /*-----------------------------------------------------------------*/ /* Append to the final CNTLIN data set and clean up. */ /*-----------------------------------------------------------------*/ proc append base=&cntlin data=cntlinx; run; proc delete data=header ranges labels cntlinx; run; /*-----------------------------------------------------------------*/ /* Added ability to create user formats to specified library RHA */ /* defaults to WORK.LIBRARY RHA */ /* Added FMTLIB option to PROC FORMAT statement JLG */ /*-----------------------------------------------------------------*/ proc format cntlin=&cntlin fmtlib library=&fmtlib..&fmtcat.; run; %mend readmem; /*----------------------------------------------------------------*/ /* CALL MACRO; Modify parameter list or take defaults */ /*----------------------------------------------------------------*/ %convert_fmtlib(v5fmtlib=SASLIBX, cntlin=CNTLIN, fmtlib=WORK, fmtcat=formats)