Using Externally Simulated Count Data

/*--------------------------------------------------------------

                    SAS Sample Library

        Name: hcdmex02.sas
 Description: Example program from SAS/ETS High Performance
              Procedures User's Guide, The HPCDM Procedure
       Title: Using Externally Simulated Count Data
     Product: SAS High Performance Econometrics Software
        Keys: Compound Distribution Modeling,
              Aggregate Loss Modeling
        PROC: HPCDM
       Notes:
--------------------------------------------------------------*/

/* Simulate count data */
data cntdataex2(keep=line corpKRI1 corpKRI2
                     cbKRI1 cbKRI2 cbKRI3 rbKRI1 rbKRI2 numloss);
   call streaminit(12345);
   array cx{7} corpKRI1 corpKRI2 cbKRI1 cbKRI2 cbKRI3 rbKRI1 rbKRI2;
   array cbetaR{8} _TEMPORARY_ (0.35 1 0 0 0 0 0.5 0.25);
   array cbetaC{8} _TEMPORARY_ (0.9 0.75 0.3 0.1 0.25 0.5 0 0);

   alpha = 0.3;
   theta = 1/alpha;
   do obs=1 to 5000;
      do i=1 to dim(cx);
         cx(i) = rand('NORMAL');
      end;

      line = 'CommercialBanking';
      xbeta = cbetaC(1);
      do i=1 to dim(cx);
         xbeta = xbeta + cx(i) * cbetaC(i+1);
      end;
      Mu = exp(xbeta);
      p = theta/(Mu+theta);
      numloss = rand('NEGB',p,theta);
      output;

      line = 'RetailBanking';
      xbeta = cbetaR(1);
      do i=1 to dim(cx);
         xbeta = xbeta + cx(i) * cbetaR(i+1);
      end;
      lambda = exp(xbeta);
      numloss = rand('POISSON', lambda);
      output;
   end;
run;

proc sort data=cntdataex2;
   by line;
run;

/* Fit negative binomial (p=2) regression model for each business line */
proc countreg data=cntdataex2 outest=countEstEx2NB2;
   by line;
   model numloss = corpKRI1 corpKRI2 cbKRI1 cbKRI2 cbKRI3
                   rbKRI1 rbKRI2 / dist=negbin;
run;

/* Fit Poisson regression model for each business line */
proc countreg data=cntdataex2 outest=countEstEx2Poisson;
   by line;
   model numloss = corpKRI1 corpKRI2 cbKRI1 cbKRI2 cbKRI3
                   rbKRI1 rbKRI2 / dist=poisson;
run;

/* Simulate severity data */
data sevdataEx2(keep=line corpKRI1 corpKRI2 cbKRI2 rbKRI1 rbKRI3 lossValue);
   array sx{5} corpKRI1 corpKRI2 cbKRI2 rbKRI1 rbKRI3;
   array sbetaC{6} _TEMPORARY_ (5 1 0 0.3 0 0);
   array sbetaR{6} _TEMPORARY_ (3.5 0 0.5 0 -0.8 0.6);
   if (_n_=1) then call streaminit(67890);

   set cntdataex2(keep=line numloss corpKRI1 corpKRI2 cbKRI2 rbKRI1);
   sigma = 1;
   alpha = 2.5;
   if (numloss > 0) then do;
      sx(5) = rand('NORMAL'); /* simulate rbKRI3 value */

      if (line='CommercialBanking') then do;
         /* lognormal */
         Mu = sbetaC(1);
         do i=1 to dim(sx);
            Mu = Mu + sx(i) * sbetaC(i+1);
         end;
         lossValue = exp(Mu) * rand('LOGNORMAL')**Sigma;
      end;
      else do;
         /* gamma */
         Mu = sbetaR(1);
         do i=1 to dim(sx);
            Mu = Mu + sx(i) * sbetaR(i+1);
         end;
         lossValue = quantile('Gamma', rand('UNIFORM'), Alpha, exp(Mu));
      end;
      output;
   end;
run;

/* Fit severity models for each business line */
proc severity data=sevdataEx2 outest=sevestEx2 plots=none;
   by line;
   loss lossValue;
   scalemodel corpKRI1 corpKRI2 cbKRI2 rbKRI1 rbKRI3;
   dist logn gamma;
run;

/* Generate a scenario data set for a single operating condition */
data singleScenario (keep=corpKRI1 corpKRI2 cbKRI1 cbKRI2 cbKRI3
                          rbKRI1 rbKRI2 rbKRI3);
   array x{8} corpKRI1 corpKRI2 cbKRI1 cbKRI2 cbKRI3 rbKRI1 rbKRI2 rbKRI3;
   call streaminit(5151);
   do i=1 to dim(x);
      x(i) = rand('NORMAL');
   end;
   output;
run;

/* Simulate multiple replications of the number of loss events that
   you can expect in the scenario being analyzed */
data lossCounts1 (keep=line corpKRI1 corpKRI2 cbKRI2 rbKRI1 rbKRI3 numloss);
   array cxR{3} corpKRI1 rbKRI1 rbKRI2;
   array cbetaR{4} _TEMPORARY_;
   array cxC{5} corpKRI1 corpKRI2 cbKRI1 cbKRI2 cbKRI3;
   array cbetaC{6} _TEMPORARY_;

   retain theta;
   if _n_ = 1 then do;
      call streaminit(5151);
      * read count model estimates *;
      set countEstEx2NB2(where=(line='CommercialBanking' and _type_='PARM'));
      cbetaC(1) = Intercept;
      do i=1 to dim(cxC);
         cbetaC(i+1) = cxC(i);
      end;
      alpha = _Alpha;
      theta = 1/alpha;

      set countEstEx2Poisson(where=(line='RetailBanking' and _type_='PARM'));
      cbetaR(1) = Intercept;
      do i=1 to dim(cxR);
         cbetaR(i+1) = cxR(i);
      end;
   end;

   set singleScenario;
   do iline=1 to 2;
      if (iline=1) then line = 'CommercialBanking';
      else line = 'RetailBanking';
      do repid=1 to 10000;
         * draw from count distribution *;
         if (iline=1) then do;
            xbeta = cbetaC(1);
            do i=1 to dim(cxC);
               xbeta = xbeta + cxC(i) * cbetaC(i+1);
            end;
            Mu = exp(xbeta);
            p = theta/(Mu+theta);
            numloss = rand('NEGB',p,theta);
         end;
         else do;
            xbeta = cbetaR(1);
            do i=1 to dim(cxR);
               xbeta = xbeta + cxR(i) * cbetaR(i+1);
            end;
            numloss = rand('POISSON', exp(xbeta));
         end;
         output;
      end;
   end;
run;

/* Keep only the best severity model for each business line
   and set coefficients of unused regressors in each model to 0 */
data sevestEx2Best;
    set sevestEx2;
    if ((line = 'CommercialBanking' and _model_ = 'Logn')) then do;
        corpKRI2 = 0; rbKRI1 = 0; rbKRI3 = 0;
        output;
    end;
    else if ((line = 'RetailBanking' and _model_ = 'Gamma')) then do;
        corpKRI1 = 0; cbKRI2 = 0;
        output;
    end;
run;

/* Estimate the distribution of the aggregate loss for both
   lines of business by using the externally simulated counts */
proc hpcdm data=lossCounts1 seed=13579 print=all
           severityest=sevestEx2Best;
   by line;
   externalcounts count=numloss;
   severitymodel logn gamma;
run;

/* Generate a scenario data set for multiple operating conditions */
data multiConditionScenario (keep=opEnvId corpKRI1 corpKRI2
      cbKRI1 cbKRI2 cbKRI3 rbKRI1 rbKRI2 rbKRI3);
   array x{8} corpKRI1 corpKRI2 cbKRI1 cbKRI2 cbKRI3 rbKRI1 rbKRI2 rbKRI3;
   call streaminit(5151);
   do opEnvId=1 to 4;
      do i=1 to dim(x);
         x(i) = rand('NORMAL');
      end;
      output;
   end;
run;

/* Simulate multiple replications of the number of loss events that you can
   expect for all operating environments in the scenario being analyzed */
data lossCounts2 (keep=line opEnvId corpKRI1 corpKRI2 cbKRI2
                       rbKRI1 rbKRI3 repid numloss);
   array cxR{3} corpKRI1 rbKRI1 rbKRI2;
   array cbetaR{4} _TEMPORARY_;
   array cxC{5} corpKRI1 corpKRI2 cbKRI1 cbKRI2 cbKRI3;
   array cbetaC{6} _TEMPORARY_;

   * Read the count model estimates;
   retain theta;
   if _n_ = 1 then do;
      call streaminit(5151);
      set countEstEx2NB2(where=(line='CommercialBanking' and _type_='PARM'));
      cbetaC(1) = Intercept;
      do i=1 to dim(cxC);
         cbetaC(i+1) = cxC(i);
      end;
      alpha = _Alpha;
      theta = 1/alpha;

      set countEstEx2Poisson(where=(line='RetailBanking' and _type_='PARM'));
      cbetaR(1) = Intercept;
      do i=1 to dim(cxR);
         cbetaR(i+1) = cxR(i);
      end;
   end;

   * Find the number of observations in the scenario data set;
   nobs = 0;
   do while(last=0);
     set multiConditionScenario end=last;
     nobs = nobs+1;
   end;
   nobstotal=nobs;

   do iline=1 to 2;
      if (iline=1) then line = 'CommercialBanking';
      else line = 'RetailBanking';
      do repid=1 to 10000;
         do nobs=1 to nobstotal;
            set multiConditionScenario point=nobs;
            * Draw from the appropriate count distribution *;
            if (line = 'CommercialBanking') then do;
               xbeta = cbetaC(1);
               do i=1 to dim(cxC);
                  xbeta = xbeta + cxC(i) * cbetaC(i+1);
               end;
               Mu = exp(xbeta);
               p = theta/(Mu+theta);
               numloss = rand('NEGB',p,theta);
            end;
            else if (line = 'RetailBanking') then do;
               xbeta = cbetaR(1);
               do i=1 to dim(cxR);
                  xbeta = xbeta + cxR(i) * cbetaR(i+1);
               end;
               numloss = rand('POISSON', exp(xbeta));
            end;
            output;
         end;
      end;
   end;
run;

/* Estimate the distribution of the aggregate loss for both
   lines of business by using the externally simulated counts
   for the multiple operating environments */
proc hpcdm data=lossCounts2 seed=13579 print=all
           severityest=sevestEx2Best plots=density;
   by line;
   distby repid;
   externalcounts count=numloss id=repid;
   severitymodel logn gamma;
run;