Getting Started with PROC HPCDM

/*--------------------------------------------------------------

                    SAS Sample Library

        Name: hcdmgs.sas
 Description: Example program from SAS/ETS High Performance
              Procedures User's Guide, The HPCDM Procedure
       Title: Getting Started with PROC HPCDM
     Product: SAS High Performance Econometrics Software
        Keys: Compound Distribution Modeling,
              Aggregate Loss Modeling
        PROC: HPCDM
       Notes:

--------------------------------------------------------------*/

/* Simulate data for an intercept-only Poisson count model */
data claimcount(keep=numLosses);
   call streaminit(12345);
   label numLosses='Number of Loss Events in a Year';
   Lambda = 2;
   do n = 1 to 500;
      numLosses = rand('POISSON',Lambda);
      output;
   end;
run;

/* Simulate data for gamma severity model */
data claimsev(keep=lossValue);
   call streaminit(67890);
   label y='Severity of a Loss Event';
   Theta = 1000;
   Alpha = 2;
   do n = 1 to 500;
      lossValue = quantile('Gamma', rand('UNIFORM'), Alpha, Theta);
      output;
   end;
run;

/* Fit an intercept-only Poisson count model and
   write estimates to an item store */
proc countreg data=claimcount;
   model numLosses= / dist=poisson;
   store countStorePoisson;
run;

/* Fit severity models and write estimates to a data set */
proc severity data=claimsev criterion=aicc outest=sevest covout plots=none;
   loss lossValue;
   dist _predefined_;
run;

/* Simulate and estimate Poisson-gamma compound distribution model */
proc hpcdm countstore=countStorePoisson severityest=sevest
           seed=13579 nreplicates=10000 plots=(edf(alpha=0.05) density)
           print=(summarystatistics percentiles);
   severitymodel gamma;
   output out=aggregateLossSample samplevar=aggloss;
   outsum out=aggregateLossSummary mean stddev skewness kurtosis
          p01 p05 p95 p995=var pctlpts=90 97.5;
run;

/* Conduct parameter perturbation analysis of
   the Poisson-gamma compound distribution model */
proc hpcdm countstore=countStorePoisson severityest=sevest
           seed=13579 nreplicates=10000 nperturbedsamples=30
           print(only)=(perturbsummary) plots=none;
   severitymodel gamma;
   output out=aggregateLossSample samplevar=aggloss;
   outsum out=aggregateLossSummary mean stddev skewness kurtosis
          p01 p05 p95 p995=var pctlpts=90 97.5;
run;

/* Simulate data for losses incurred by several policyholders
   in some period of time */
data losses(keep=policyholderId age gender carType annualMiles
                 education carSafety income noloss lossamount);
   call streaminit(12345);
   array cx{5} age gender carType annualMiles education;
   array cbeta{6} _TEMPORARY_ (1 -0.75 1 0.6 -1 -0.25);

   array sx{3} carType carSafety income;
   array sbeta{4} _TEMPORARY_ (3.5 1.5 -0.8 0.6);

   alpha = 1/3; theta = 1/alpha;
   Sigma = 1;
   do policyholderId=1 to 5000;
      /* simulate policyholder and vehicle attributes */
      age = MAX(int(rand('NORMAL', 35, 15)),16)/50;

      if (rand('UNIFORM') < 0.5) then gender = 1; * female;
      else gender = 2; * male;

      if (rand('UNIFORM') < 0.7) then carType = 1; * sedan;
      else carType = 2; * SUV;

      annualMiles = MAX(1000, int(rand('NORMAL', 12000, 5000)))/5000;

      educationLevel = rand('UNIFORM');
      if (educationLevel < 0.5) then education = 1; *high school graduate;
      else if (educationLevel < 0.85) then education = 2; *college graduate;
      else education = 3; *advanced degree;

      carSafety = rand('UNIFORM'); /* scaled to be between 0 & 1 */

      income = MAX(15000,int(rand('NORMAL', education*30000, 50000)))/100000;

      /* simulate number of losses incurred by this policyholder */
      cxbeta = cbeta(1);
      do i=1 to dim(cx);
         cxbeta = cxbeta + cx(i) * cbeta(i+1);
      end;
      Mu = exp(cxbeta);
      p = theta/(Mu+theta);
      numloss = rand('NEGB',p,theta);

      /* simulate severity of each of the losses */
      if (numloss > 0) then do;
         noloss = 0;
         do iloss=1 to numloss;
            Mu = sbeta(1);
            do i=1 to dim(sx);
               Mu = Mu + sx(i) * sbeta(i+1);
            end;
            lossamount = exp(Mu) * rand('LOGNORMAL')**Sigma;
            output;
         end;
      end;
      else do;
         noloss = 1;
         lossamount = .;
         output;
      end;
   end;
run;

/* Aggregate number of losses for each policyholder */
data losscounts(keep=age gender carType annualMiles education numloss);
   set losses;
   by policyholderId;
   retain numloss 0;
   if (noloss ne 1) then
      numloss = numloss + 1;
   if (last.policyholderId) then do;
      output;
      numloss = 0;
   end;
run;

/* Fit negative binomial frequency model for the number of losses */
proc countreg data=losscounts;
   model numloss = age gender carType annualMiles education / dist=negbin;
   store work.countregmodel;
run;

/* Examine the parameter estimates for the model in the item store */
proc countreg restore=work.countregmodel;
   show parameters;
run;

/* Fit severity models for the magnitude of losses */
proc severity data=losses plots=none outest=work.sevregest print=all;
   loss lossamount;
   scalemodel carType carSafety income;
   dist _predef_;
   nloptions maxiter=100;
run;

/* Generate the scenario data set for single policyholder */
data singlePolicy(keep=age gender carType annualMiles
                       education carSafety income);
   call streaminit(67897);

   age = MAX(int(rand('NORMAL', 35, 15)),16)/50;

   if (rand('UNIFORM') < 0.5) then gender = 1; * female;
   else gender = 2; * male;

   if (rand('UNIFORM') < 0.7) then carType = 1; * sedan;
   else carType = 2; * SUV;

   annualMiles = MAX(1000, int(rand('NORMAL', 12000, 5000)))/5000;

   educationLevel = rand('UNIFORM');
   if (educationLevel < 0.5) then education = 1; *high school graduate;
   else if (educationLevel < 0.85) then education = 2; *college graduate;
   else education = 3; *advanced degree;

   carSafety = rand('UNIFORM'); /* scaled to be between 0 & 1 */

   income = MAX(15000,int(rand('NORMAL', education*30000, 50000)))/100000;

   output;
run;

/* Simulate the aggregate loss distribution for the scenario
   with single policyholder */
proc hpcdm data=singlePolicy nreplicates=10000 seed=13579 print=all
           countstore=work.countregmodel severityest=work.sevregest;
   severitymodel logn gpd;
   outsum out=onepolicysum mean stddev skew kurtosis median
         pctlpts=97.5 to 99.5 by 1;
run;

/* Generate the scenario data set for multiple policyholders */
data groupOfPolicies(keep=policyholderId age gender carType annualMiles
                          education carSafety income);
   call streaminit(67897);

   do policyholderId=1 to 5;
      age = MAX(int(rand('NORMAL', 35, 15)),16)/50;

      if (rand('UNIFORM') < 0.5) then gender = 1; * female;
      else gender = 2; * male;

      if (rand('UNIFORM') < 0.7) then carType = 1; * sedan;
      else carType = 2; * SUV;

      annualMiles = MAX(1000, int(rand('NORMAL', 12000, 5000)))/5000;

      educationLevel = rand('UNIFORM');
      if (educationLevel < 0.5) then education = 1; *high school graduate;
      else if (educationLevel < 0.85) then education = 2; *college graduate;
      else education = 3; *advanced degree;

      carSafety = rand('UNIFORM'); /* scaled to be between 0 & 1 */

      income = MAX(15000,int(rand('NORMAL', education*30000, 50000)))/100000;

      output;
   end;
run;

/* Simulate the aggregate loss distribution for the scenario
   with multiple policyholders */
proc hpcdm data=groupOfPolicies nreplicates=10000 seed=13579 print=all
           countstore=work.countregmodel severityest=work.sevregest
           plots=(conditionaldensity(rightq=0.95)) nperturbedSamples=50;
   severitymodel logn gpd;
   outsum out=multipolicysum mean stddev skew kurtosis median
         pctlpts=97.5 to 99.5 by 1;
run;