%MM_Countreg_Create_Scorecode Autocall Macro

Generates score code for a model that is created by the COUNTREG procedure.

Syntax

Arguments

ParmEst=countreg-parameter-estimate-dataset

specifies the name of the parameter estimations ODS output data. This ParameterEstimates data set is created when PROC COUNTREG executes. To capture this data set, use the ODS OUTPUT statement before PROC COUNTREG executes.

Tip In the PROC COUNTREG code, include the PREDICTION= and the PREOBZERO= options in the OUTPUT statement.

FileRef=output-fileref

specifies the fileref that defines the location of the macro output files.

Default The SAS log

PredPrefix=dependent-variable-prefix

specifies a prefix for the predicted dependent variable. The variable is named in the PRED= option of the PROC COUNTREG OUTPUT= statement. When is prefix is applied to the dependent variable, this new name becomes the prediction variable.

Default P_

PZPrefix=probability-zero-variable-prefix

specifies a prefix for the variable that indicates the probability that the response variable will take on the value of zero as a result of the zero-generating process. The variable is named in the PROBZERO= option of the PROC COUNTREG OUTPUT= statement. When the prefix is applied to the probability zero variable, this new name becomes the probability zero variable.

Default PHI_

Details

To create score code for a model that you create with PROC COUNTREG, include the following SAS code:
  1. Use a LIBNAME statement to identify the location of the output that you create using PROC COUNTREG.
  2. Before PROC COUNTREG, use the ODS OUTPUT statement to capture the ParameterEstimates output data set. Here is an example:
    ods output ParameterEstimates=CntReg.ParameterEstimates;
  3. Build your model using PROC COUNTREG and close the ODS OUTPUT destination.
  4. Use the FILENAME statement to define a fileref for the macro output location.
  5. Invoke the %mm_countreg_create_scorecode macro.
  6. Execute the score code within a DATA step.

Example: Generate the PROC COUNTREG Score Code for Insurance Risk

Create the Sample Insurance Data

The following SAS program creates sample data that resembles an automobile policy history file for a property and casualty insurance program:
%let MyProj = C:\Users\myID;
%let MyProj = C:\Users\minlam\Documents\Projects;
libname CntReg "&MyProj.\CountReg\Test";
options fmtsearch = (CntReg.formats);
proc format library = CntReg cntlout = phf_fmt;
   value $ Gender_fmt
   'Male' = 'Man'
   'Female' = 'Woman';
   value HO_fmt
   0 = 'No'
   1 = 'Yes';
run;
  
data CntReg.phf;
   length CarType $ 5;
   label CarType = 'Type of Car';
   length Gender $ 6;
   format Gender $ Gender_fmt.;
   label Gender = 'Gender Identification';
  
   /* This variable name will test how the macro will resolve name conflicts */
   length Estimate $ 6;
   label Estimate = 'Gender Identification (Copy)';
   label AgeDriver = 'Driver Age';
   format HomeOwner HO_fmt.;
   call streaminit(27513);
  
   do PolicyId = 00001 to 99999;
     StartYr = 2000 + 
               rand('table', 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1);
       do ExpYr = StartYr to 2011;
          EExp = rand('uniform');
          MyOffset = 0;
          select (rand('table', 0.499, 0.299, 0.199, 0.003));
             when (1)
             do;
                CarType = 'SEDAN';
                fCarType = 0;
             end;
             when (2)
             do;
                CarType = 'TRUCK';
                fCarType = 0.5;
             end;
             when (3)
             do;
                CarType = 'SPORT';
                fCarType = 1.0;
             end;
             otherwise CarType = ' ';
       end;
  
     AgeDriver = 18 + rand('binomial',0.375, 72);
     fAgeDriver = 0.0123 * (AgeDriver - 17);
  
     HomeOwner = rand('bernoulli', 0.25);
     if (HomeOwner eq 0) then fHomeOwner = 0.7;
        else if (HomeOwner eq 1) then fHomeOwner = 0;
     if (HomeOwner eq 1) then
        do;
           IS = round(rand('uniform') * 5) - 2.5;
           fIS = -0.0456 * IS * IS;
        end;     
     if (EExp lt 0.5) then
        do;
           Gender = 'Male';
           fGender = 0;
        end;
        else if (EExp lt 0.9) then
           do;
              Gender = 'Female';
              fGender = -1.5;
           end;
               else Gender = ' ';
     Estimate = Gender;
     if (missing(HomeOwner) eq 0 and missing(IS) eq 0) 
        then mu_zero = 0.987 + fHomeOwner + fIS;
        else mu_zero = 0.987;
     phi = cdf('normal', mu_zero, 0, 1);
     if (rand('bernoulli', phi) eq 0) then
       do;
         if (missing(CarType) eq 0 and missing(AgeDriver) eq 0 and 
             missing(Gender) eq 0) 
            then mu = 2 + fCarType + fAgeDriver + fGender;
            else mu = 2;
         nClaim = rand('poisson', exp(mu));
       end;
       else nClaim = 0;
     output;
   end;
   end;
   drop fCarType fAgeDriver fHomeOwner fGender;
   drop mu_zero mu;
run;

Run the Sample Program

Here is the sample program:
%let MyProj = C:\Users\emdev;
libname CntReg "&MyProj.\CountReg\Test";
options fmtsearch = (CntReg.formats);
   
/* Original Model */
%let model = 1;
   
/* Build the model and deliver the required ODS datasets */
ods output ParameterEstimates = CntReg.ParameterEstimates_&model.;
   
proc countreg data = CntReg.phf;
   class CarType Gender HomeOwner;
   model nClaim = CarType AgeDriver Gender / dist = poisson;
   zeromodel nClaim ~ HomeOwner IS * IS / link = normal;
   output out = CntReg.phf_pred_&model. 
   predicted = Pred_nClaim probzero = Phi_nClaim;
run;
   
ods output close;
  
  
/* Define the fileref for the output syntax */
filename ThisFile "&MyProj.\CountReg\Test\ScoreCode_&Model..sas";
   
/* Invoke the macro */
%mm_countreg_create_scorecode(
   ParamEst = CntReg.ParameterEstimates_&Model.,
   FileRef = ThisFile,
   PredPrefix = MyPred_,
   PZPrefix = MyPhi_,
);
   
/* Execute the score codes within a DATA STEP */
data CntReg.phf_pred_compare;
   set CntReg.phf_pred_&Model.;
   %include ThisFile;
   IsMiss_Pred_nClaim = missing(Pred_nClaim);
   IsMiss_Phi_nClaim = missing(Phi_nClaim);
   IsMiss_MyPred_nClaim = missing(MyPred_nClaim);
   IsMiss_MyPhi_nClaim = missing(MyPhi_nClaim);
   if (IsMiss_Pred_nClaim eq 0 and IsMiss_MyPred_nClaim eq 0) 
     then MyDiffPred = MyPred_nClaim - Pred_nClaim;
   if (IsMiss_Phi_nClaim eq 0 and IsMiss_MyPhi_nClaim eq 0) 
     then MyDiffPhi = MyPhi_nClaim - Phi_nClaim;
run;
   
proc contents data = CntReg.phf_pred_compare;
run;
   
/* If the score codes work correctly, then the MyDifference variable should be 
   a constant variable of all zero values */
proc freq data = CntReg.phf_pred_compare;
   tables _WARN_;
run;
   
proc tabulate data = CntReg.phf_pred_compare;
   class IsMiss_Pred_nClaim IsMiss_MyPred_nClaim 
   IsMiss_Phi_nClaim IsMiss_MyPhi_nClaim;
   var Pred_nClaim MyPred_nClaim MyDiffPred Phi_nClaim 
       MyPhi_nClaim MyDiffPhi;
   table IsMiss_Pred_nClaim * IsMiss_MyPred_nClaim * 
         (n nmiss mean*f=e22. stddev*f=e22. min*f=e22. max*f=e22.), 
         (Pred_nClaim MyPred_nClaim MyDiffPred);
   table IsMiss_Phi_nClaim * IsMiss_MyPhi_nClaim * 
         (n nmiss mean*f=e22. stddev*f=e22. min*f=e22. max*f=e22.), 
         (Phi_nClaim MyPhi_nClaim MyDiffPhi);  
run;
quit;

The Generated Score Code and Output Tables

Generated Score Code
/**********************************************************************/
/* Begin scoring code for COUNTREG                                    */
/* Model: ZIP                                                         */
/* Created By: emdev                                                  */
/* Date: April 26, 2013                                               */
/* Time: 09:27:39                                                     */
/**********************************************************************/

LENGTH _WARN_ $ 4;
_WARN_ = '    ';
LABEL _WARN_ = "Warnings" ;

_nInputMiss = 0;

/**********************************************************************/
/* Check the continuous predictors                                    */
/**********************************************************************/

IF ( MISSING( AgeDriver ) EQ 1 ) THEN _nInputMiss = _nInputMiss + 1;

IF ( MISSING( IS ) EQ 1 ) THEN _nInputMiss = _nInputMiss + 1;

_nInputOutRange = 0;

/**********************************************************************/
/* Check the CLASS predictors                                         */
/**********************************************************************/

LENGTH _UFormat_1 $ 5 ;
LABEL _UFormat_1 = "Formatted Value of CarType" ;
IF ( MISSING( CarType ) EQ 0 ) THEN DO;
   _UFormat_1 = STRIP( PUT( CarType , $5. ) );
   IF ( _UFormat_1
        NOTIN ( "SEDAN"
              , "SPORT"
              , "TRUCK"
              )
      ) THEN _nInputOutRange = _nInputOutRange + 1;
END;
ELSE _nInputMiss = _nInputMiss + 1;

LENGTH _UFormat_2 $ 5 ;
LABEL _UFormat_2 = "Formatted Value of Gender" ;
IF ( MISSING( Gender ) EQ 0 ) THEN DO;
   _UFormat_2 = STRIP( PUT( Gender , $GENDER_FMT5. ) );
   IF ( _UFormat_2
        NOTIN ( "Man"
              , "Woman"
              )
      ) THEN _nInputOutRange = _nInputOutRange + 1;
END;
ELSE _nInputMiss = _nInputMiss + 1;
LENGTH _UFormat_3 $ 3 ;
LABEL _UFormat_3 = "Formatted Value of HomeOwner" ;
IF ( MISSING( HomeOwner ) EQ 0 ) THEN DO;
   _UFormat_3 = STRIP( PUT( HomeOwner , HO_FMT3. ) );
   IF ( _UFormat_3
        NOTIN ( "No"
              , "Yes"
              )
      ) THEN _nInputOutRange = _nInputOutRange + 1;
END;
ELSE _nInputMiss = _nInputMiss + 1;

/**********************************************************************/
/* Set _WARN_ value                                                   */
/**********************************************************************/

_VALID2SCORE = 1;
LABEL _VALID2SCORE = "Is this record valid to be scored? 1=Yes, 0=No" ;

IF ( _nInputMiss GT 0 ) THEN DO;
   SUBSTR(_WARN_,1,1) = 'M';
   _VALID2SCORE = 0;
END;
IF ( _nInputOutRange GT 0 ) THEN DO;
   SUBSTR(_WARN_,2,1) = 'U';
   _VALID2SCORE = 0;
END;

/**********************************************************************/
/* Calculate scores only if current record contains valid values      */
/**********************************************************************/

IF ( _VALID2SCORE EQ 1 ) THEN DO;

   _NU_MODEL = 0 ;
   _NU_ZEROMODEL = 0 ;

   _NU_MODEL = _NU_MODEL + 7.889048183464800E-01
      ;

   IF ( _UFormat_1 EQ "SEDAN"
      ) THEN DO;
      _NU_MODEL = _NU_MODEL - 4.983426513164500E-01
      ;
   END;

   IF ( _UFormat_1 EQ "SPORT"
      ) THEN DO;
      _NU_MODEL = _NU_MODEL + 4.985885591940500E-01
      ;
   END;

   _NU_MODEL = _NU_MODEL + 1.227923016048900E-02
         * AgeDriver
      ;

   IF ( _UFormat_2 EQ "Man"
      ) THEN DO;
      _NU_MODEL = _NU_MODEL + 1.503894036936300E+00
      ;
   END;

   _NU_ZEROMODEL = _NU_ZEROMODEL + 9.925866013120000E-01
      ;
   IF ( _UFormat_3 EQ "No"
      ) THEN DO;
      _NU_ZEROMODEL = _NU_ZEROMODEL + 6.905739218180000E-01
      ;
   END;

   _NU_ZEROMODEL = _NU_ZEROMODEL - 4.346588113784800E-02
         * IS
         * IS
      ;

   _LOG_TAIL_P_ = LOGSDF( 'NORMAL' , _NU_ZEROMODEL );

   IF ( (_NU_MODEL + _LOG_TAIL_P_) LE 709.780 ) 
   THEN MyPred_nClaim = EXP( _NU_MODEL + _LOG_TAIL_P_ );
   ELSE MyPred_nClaim = .;

   MyPhi_nClaim = 1 - EXP( _LOG_TAIL_P_ );

END;   /* END (_VALID2SCORE EQ 1) IF BLOCK */

LABEL MyPred_nClaim = "Predicted value of nClaim" ;
LABEL MyPhi_nClaim = "Probability of nClaim being zero as a result 
of the zero-generating process" ;

DROP _nInputMiss _VALID2SCORE _NU_MODEL;
DROP _NU_ZEROMODEL _LOG_TAIL_P_;
DROP _nInputOutRange
     _UFormat_1
     _UFormat_2
     _UFormat_3
     ;

/**********************************************************************/
/* End scoring code for COUNTREG                                      */
/**********************************************************************/
The Tables Created by the Sample Program
PROC COUNTREG Class Level Information
PROC CONTENTS Model Fit Summary
PROC COUNTREG Class Level Information
PROC CONTENTS Model Fit Summary
PROC COUNTREG Parameter Estimates
Sample Data Variable Attributes
PROC CONTENTS Output for Score Code Output Data Set
PROC CONTENTS Engine and Host Dependent Information for Score Code Output Data Set
PROC CONTENTS Alphabetic List of Variables and Attributes for Score Code Output Data Set
Warnings Table from PROC FREQ Output
Output Table 1 from PROC TABULATE
Output Table 1 from PROC TABULATE