Using Externally Simulated Count Data
/*--------------------------------------------------------------
SAS Sample Library
Name: hcdmex02.sas
Description: Example program from SAS/ETS High Performance
Procedures User's Guide, The HPCDM Procedure
Title: Using Externally Simulated Count Data
Product: SAS High Performance Econometrics Software
Keys: Compound Distribution Modeling,
Aggregate Loss Modeling
PROC: HPCDM
Notes:
--------------------------------------------------------------*/
/* Simulate count data */
data cntdataex2(keep=line corpKRI1 corpKRI2
cbKRI1 cbKRI2 cbKRI3 rbKRI1 rbKRI2 numloss);
call streaminit(12345);
array cx{7} corpKRI1 corpKRI2 cbKRI1 cbKRI2 cbKRI3 rbKRI1 rbKRI2;
array cbetaR{8} _TEMPORARY_ (0.1 1 0 0 0 0 0.5 0.25);
array cbetaC{8} _TEMPORARY_ (0.5 0.75 0.3 0.1 0.25 0.5 0 0);
alpha = 0.3;
theta = 1/alpha;
do obs=1 to 5000;
do i=1 to dim(cx);
cx(i) = rand('NORMAL');
end;
line = 'CommercialBanking';
xbeta = cbetaC(1);
do i=1 to dim(cx);
xbeta = xbeta + cx(i) * cbetaC(i+1);
end;
Mu = exp(xbeta);
p = theta/(Mu+theta);
numloss = rand('NEGB',p,theta);
output;
line = 'RetailBanking';
xbeta = cbetaR(1);
do i=1 to dim(cx);
xbeta = xbeta + cx(i) * cbetaR(i+1);
end;
lambda = exp(xbeta);
numloss = rand('POISSON', lambda);
output;
end;
run;
proc sort data=cntdataex2;
by line;
run;
/* Fit negative binomial (p=2) regression model for each business line */
proc countreg data=cntdataex2 outest=countEstEx2NB2;
by line;
model numloss = corpKRI1 corpKRI2 cbKRI1 cbKRI2 cbKRI3
rbKRI1 rbKRI2 / dist=negbin;
run;
/* Fit Poisson regression model for each business line */
proc countreg data=cntdataex2 outest=countEstEx2Poisson;
by line;
model numloss = corpKRI1 corpKRI2 cbKRI1 cbKRI2 cbKRI3
rbKRI1 rbKRI2 / dist=poisson;
run;
/* Simulate severity data */
data sevdataEx2(keep=line corpKRI1 corpKRI2 cbKRI2 rbKRI1 rbKRI3 lossValue);
array sx{5} corpKRI1 corpKRI2 cbKRI2 rbKRI1 rbKRI3;
array sbetaC{6} _TEMPORARY_ (5 1 0 0.3 0 0);
array sbetaR{6} _TEMPORARY_ (3.5 0 0.5 0 -0.8 0.6);
if (_n_=1) then call streaminit(67890);
set cntdataex2(keep=line numloss corpKRI1 corpKRI2 cbKRI2 rbKRI1);
sigma = 1;
alpha = 2.5;
if (numloss > 0) then do;
sx(5) = rand('NORMAL'); /* simulate rbKRI3 value */
if (line='CommercialBanking') then do;
/* lognormal */
Mu = sbetaC(1);
do i=1 to dim(sx);
Mu = Mu + sx(i) * sbetaC(i+1);
end;
lossValue = exp(Mu) * rand('LOGNORMAL')**Sigma;
end;
else do;
/* gamma */
Mu = sbetaR(1);
do i=1 to dim(sx);
Mu = Mu + sx(i) * sbetaR(i+1);
end;
lossValue = quantile('Gamma', rand('UNIFORM'), Alpha, exp(Mu));
end;
output;
end;
run;
/* Fit severity models for each business line */
proc severity data=sevdataEx2 outest=sevestEx2 plots=none;
by line;
loss lossValue;
scalemodel corpKRI1 corpKRI2 cbKRI2 rbKRI1 rbKRI3;
dist logn gamma;
run;
/* Generate a scenario data set for a single operating condition */
data singleScenario (keep=corpKRI1 corpKRI2 cbKRI1 cbKRI2 cbKRI3
rbKRI1 rbKRI2 rbKRI3);
array x{8} corpKRI1 corpKRI2 cbKRI1 cbKRI2 cbKRI3 rbKRI1 rbKRI2 rbKRI3;
call streaminit(5151);
do i=1 to dim(x);
x(i) = rand('NORMAL');
end;
output;
run;
/* Simulate multiple replications of the number of loss events that
you can expect in the scenario being analyzed */
data lossCounts1 (keep=line corpKRI1 corpKRI2 cbKRI2 rbKRI1 rbKRI3 numloss);
array cxR{3} corpKRI1 rbKRI1 rbKRI2;
array cbetaR{4} _TEMPORARY_;
array cxC{5} corpKRI1 corpKRI2 cbKRI1 cbKRI2 cbKRI3;
array cbetaC{6} _TEMPORARY_;
retain theta;
if _n_ = 1 then do;
call streaminit(5151);
* read count model estimates *;
set countEstEx2NB2(where=(line='CommercialBanking' and _type_='PARM'));
cbetaC(1) = Intercept;
do i=1 to dim(cxC);
cbetaC(i+1) = cxC(i);
end;
alpha = _Alpha;
theta = 1/alpha;
set countEstEx2Poisson(where=(line='RetailBanking' and _type_='PARM'));
cbetaR(1) = Intercept;
do i=1 to dim(cxR);
cbetaR(i+1) = cxR(i);
end;
end;
set singleScenario;
do iline=1 to 2;
if (iline=1) then line = 'CommercialBanking';
else line = 'RetailBanking';
do repid=1 to 10000;
nnz = 1;
maxtries = 5*nnz;
nc = 0;
ntries = 0;
do while (nc < nnz and ntries < maxtries);
* draw from count distribution *;
if (iline=1) then do;
xbeta = cbetaC(1);
do i=1 to dim(cxC);
xbeta = xbeta + cxC(i) * cbetaC(i+1);
end;
Mu = exp(xbeta);
p = theta/(Mu+theta);
numloss = rand('NEGB',p,theta);
end;
else do;
xbeta = cbetaR(1);
do i=1 to dim(cxR);
xbeta = xbeta + cxR(i) * cbetaR(i+1);
end;
numloss = rand('POISSON', exp(xbeta));
end;
if (numloss > 0) then do;
output;
nc = nc + 1;
end;
ntries = ntries + 1;
end;
end;
end;
run;
/* Keep only the best severity model for each business line
and set coefficients of unused regressors in each model to 0 */
data sevestEx2Best;
set sevestEx2;
if ((line = 'CommercialBanking' and _model_ = 'Logn')) then do;
corpKRI2 = 0; rbKRI1 = 0; rbKRI3 = 0;
output;
end;
else if ((line = 'RetailBanking' and _model_ = 'Gamma')) then do;
corpKRI1 = 0; cbKRI2 = 0;
output;
end;
run;
/* Estimate the distribution of the aggregate loss for both
lines of business by using the externally simulated counts */
proc hpcdm data=lossCounts1 seed=13579 print=all
severityest=sevestEx2Best;
by line;
externalcounts count=numloss;
severitymodel logn gamma;
run;
/* Generate a scenario data set for multiple operating conditions */
data multiConditionScenario (keep=opEnvId corpKRI1 corpKRI2
cbKRI1 cbKRI2 cbKRI3 rbKRI1 rbKRI2 rbKRI3);
array x{8} corpKRI1 corpKRI2 cbKRI1 cbKRI2 cbKRI3 rbKRI1 rbKRI2 rbKRI3;
call streaminit(5151);
do opEnvId=1 to 4;
do i=1 to dim(x);
x(i) = rand('NORMAL');
end;
output;
end;
run;
/* Simulate multiple replications of the number of loss events that you can
expect for all operating environments in the scenario being analyzed */
data lossCounts2 (keep=line opEnvId corpKRI1 corpKRI2 cbKRI2
rbKRI1 rbKRI3 repid numloss);
array cxR{3} corpKRI1 rbKRI1 rbKRI2;
array cbetaR{4} _TEMPORARY_;
array cxC{5} corpKRI1 corpKRI2 cbKRI1 cbKRI2 cbKRI3;
array cbetaC{6} _TEMPORARY_;
* Read the count model estimates;
retain theta;
if _n_ = 1 then do;
call streaminit(5151);
set countEstEx2NB2(where=(line='CommercialBanking' and _type_='PARM'));
cbetaC(1) = Intercept;
do i=1 to dim(cxC);
cbetaC(i+1) = cxC(i);
end;
alpha = _Alpha;
theta = 1/alpha;
set countEstEx2Poisson(where=(line='RetailBanking' and _type_='PARM'));
cbetaR(1) = Intercept;
do i=1 to dim(cxR);
cbetaR(i+1) = cxR(i);
end;
end;
* Find the number of observations in the scenario data set;
nobs = 0;
do while(last=0);
set multiConditionScenario end=last;
nobs = nobs+1;
end;
nobstotal=nobs;
do iline=1 to 2;
if (iline=1) then line = 'CommercialBanking';
else line = 'RetailBanking';
do repid=1 to 10000;
nnz = 1;
maxtries = 5*nobstotal*nnz;
nc = 0;
ntries = 0;
do while (nc < nnz and ntries < maxtries);
do nobs=1 to nobstotal;
set multiConditionScenario point=nobs;
* Draw from the appropriate count distribution *;
if (line = 'CommercialBanking') then do;
xbeta = cbetaC(1);
do i=1 to dim(cxC);
xbeta = xbeta + cxC(i) * cbetaC(i+1);
end;
Mu = exp(xbeta);
p = theta/(Mu+theta);
numloss = rand('NEGB',p,theta);
end;
else if (line = 'RetailBanking') then do;
xbeta = cbetaR(1);
do i=1 to dim(cxR);
xbeta = xbeta + cxR(i) * cbetaR(i+1);
end;
numloss = rand('POISSON', exp(xbeta));
end;
if (numloss > 0) then do;
output;
nc = nc + 1;
end;
ntries = ntries + 1;
end;
end;
end;
end;
run;
/* Estimate the distribution of the aggregate loss for both
lines of business by using the externally simulated counts
for the multiple operating environments */
proc hpcdm data=lossCounts2 seed=13579 print=all
severityest=sevestEx2Best plots=density;
by line;
distby repid;
externalcounts count=numloss id=repid;
severitymodel logn gamma;
run;