Scenario Analysis with Rich Regression Effects and BY Groups

/*--------------------------------------------------------------

                    SAS Sample Library

        Name: hcdmex03.sas
 Description: Example program from SAS/ETS High Performance
              Procedures User's Guide, The HPCDM Procedure
       Title: Scenario Analysis with Rich Regression Effects and BY Groups
     Product: SAS High Performance Econometrics Software
        Keys: Compound Distribution Modeling,
              Aggregate Loss Modeling
        PROC: HPCDM
       Notes:
--------------------------------------------------------------*/

%let npolicy=5000;

%macro generateRegvars;
   age = MAX(int(rand('NORMAL', 35, 15)),16)/50;

   if (rand('UNIFORM') < 0.5) then do;
      genderNum = 1; * female;
      gender = 'F';
   end;
   else do;
      genderNum = 2; * male;
      gender = 'M';
   end;

   if (rand('UNIFORM') < 0.7) then do;
      carTypeNum = 1; * sedan;
      carType = 'Sedan';
   end;
   else do;
      carTypeNum = 2; * SUV;
      carType = 'SUV';
   end;

   educationLevel = rand('UNIFORM');
   if (educationLevel < 0.5) then do;
      educationNum = 1; *high school graduate;
      education = 'High School';
   end;
   else if (educationLevel < 0.85) then do;
      educationNum = 2; *college graduate;
      education = 'College';
   end;
   else do;
      educationNum = 3; *advanced degree;
      education = 'Advanced Degree';
   end;

   annualmiles = MAX(1000, int(rand('NORMAL', 12000, 5000)))/5000;

   carSafety = rand('UNIFORM'); * scaled to be between 0 & 1 *;

   income = MAX(15000,int(rand('NORMAL', educationNum*30000, 50000)))/100000;
%mend;

data losses(keep=region policyholderId age gender carType
                 annualmiles education carSafety
                 income lossAmount noloss);
   call streaminit(12345);
   array cx{6} age genderF milesSUV milesSedan educationAdv educationColl;
   array cbeta{7} _TEMPORARY_ (1 0.75 -1 -1.2 -0.6 0.5 0.75); * first one is for intercept;

   array czx{4} age carTypeSUV educationAdv educationColl;
   array czbeta{5} _TEMPORARY_ (-0.75 -1 -1.2 0.6 0.7); * first one is for intercept;

   array sx{8} _temporary_;
   array sbeta{9} _TEMPORARY_ (5 0.5 1.15 -0.75 -0.3 0.4 0.7 -0.5 -0.3); * first one is for intercept;

   length gender $1 carType $8 education $16;

   alpha = 0.9; theta = 1/alpha;

   do iregion=1 to 2;
      if (iregion = 1) then do;
         region = 'East';
         sigma = 0.5;
      end;
      else do;
         region = 'West';
         /* slightly change parameter estimates for this group */
         do i=1 to dim(cbeta);
           cbeta(i) = cbeta(i) + 0.1;
         end;
         do i=1 to dim(czbeta);
           czbeta(i) = czbeta(i) + 0.1;
         end;
         do i=1 to dim(sbeta);
           sbeta(i) = sbeta(i) + 0.05;
         end;
         sigma = 0.3;
         alpha = 1.6; theta = 1/alpha;
      end;

      do policyholderId=1 to &npolicy;
         * simulate policyholder and vehicle attributes *;
         %generateRegvars;

         do i=1 to dim(sx);
            sx(i) = 0;
         end;
         if (carTypeNum = 2) then do;
            sx(1) = 1;
            carTypeSUV = 1;
            milesSUV = annualmiles;
            milesSedan = 0;
         end;
         else do;
            carTypeSUV = 0;
            milesSUV = 0;
            milesSedan = annualmiles;
         end;

         if (genderNum = 1) then do;
            sx(2) = 1;
            genderF = 1;
         end;
         else genderF = 0;

         sx(3) = carSafety;
         sx(4) = income;
         if (educationNum = 3 and carTypeNum = 2) then sx(5) = 1;
         if (educationNum = 2 and carTypeNum = 2) then sx(6) = 1;
         if (educationNum = 3 and carTypeNum = 1) then sx(7) = 1;
         if (educationNum = 2 and carTypeNum = 1) then sx(8) = 1;

         if (educationNum = 3) then do;
            educationAdv = 1;
            educationColl = 0;
         end;
         else do;
            educationAdv = 0;
            if (educationNum = 2) then educationColl = 1;
            else educationColl = 0;
         end;

         * simulate number of losses incurred by this policyholder *;
         czxbeta = czbeta(1);
         do i=1 to dim(czx);
            czxbeta = czxbeta + czx(i) * czbeta(i+1);
         end;
         phi = exp(czxbeta)/(1+exp(czxbeta));

         if (rand('UNIFORM') < phi) then do;
            * zero process is selected with probability phi;
            numloss = 0;
         end;
         else do;
            * negbin process is selected with probability (1-phi);
            cxbeta = cbeta(1);
            do i=1 to dim(cx);
              cxbeta = cxbeta + cx(i) * cbeta(i+1);
            end;
            Mu = exp(cxbeta);
            p = theta/(Mu+theta);
            numloss = rand('NEGB',p,theta);
         end;

         * simulate severity of each of the losses *;
         if (numloss > 0) then do;
            noloss = 0;
            Mu = sbeta(1);
            do i=1 to dim(sx);
               Mu = Mu + sx(i) * sbeta(i+1);
            end;
            do iloss=1 to numloss;
               lossAmount = exp(Mu) * rand('LOGNORMAL')**Sigma;
               output;
            end;
         end;
         else do;
            noloss = 1;
            lossAmount = .;
            output;
         end;
      end;
   end;
run;

proc sort data=losses out=lossesByPolicy;
   by region policyholderId;
run;

data losscounts(keep=region numloss age gender carType annualmiles education income);
   set lossesByPolicy;
   by region policyholderId;
   retain numloss zeroloss 0;
   if (noloss eq 1) then
      zeroloss = zeroloss + 1;
   else
      numloss = numloss + 1;
   if (last.policyholderId) then do;
      output;
      numloss = 0;
   end;
run;

*** create scenario for internal count simulation *;
data scenario(keep=region policyholderId age gender
                   carType annualmiles education carSafety income);
   call streaminit(456);

   length region $4 gender $1 carType $8 education $16;

   do iregion=1 to 2;
      if (iregion = 1) then do;
         region = 'East';
         npolicy = 3;
      end;
      else do;
         region = 'West';
         npolicy = 5;
      end;
      do policyholderId = 1 to npolicy;
         %generateRegvars;
         output;
      end;
    end;
run;

proc severity data=losses(where=(not(missing(lossAmount))))
        covout outstore=work.sevstore print=all plots=none;
    by region;
    loss lossAmount;
    class carType gender education;
    scalemodel carType gender carSafety income education*carType
               income*gender carSafety*income;
    dist logn burr;
run;

proc countreg data=losscounts covout;
   by region;
   class gender carType education;
   model numloss = age income gender carType*annualmiles education / dist=negbin;
   zeromodel numloss ~ age income carType education;
   store cstore;
run;

/* Re-fit models after removing insignificant effects. */
proc severity data=losses(where=(not(missing(lossAmount))))
        covout outstore=work.sevstore print=all plots=none;
    by region;
    loss lossAmount;
    class carType gender education;
    scalemodel carType gender carSafety income education*carType;
    dist logn burr;
run;

proc countreg data=losscounts covout;
   by region;
   class gender carType education;
   model numloss = age gender carType*annualmiles education / dist=negbin;
   zeromodel numloss ~ age carType education;
   store cstore;
run;

proc hpcdm data=scenario nreplicates=10000 seed=123 print=all
           severitystore=work.sevstore countstore=work.cstore
           nperturb=30;
   by region;
   severitymodel logn;
   outsum out=agglossStats mean stddev skewness kurtosis pctlpts=(90 97.5 99.5);
run;