Scenario Analysis with Rich Regression Effects and BY Groups
/*--------------------------------------------------------------
SAS Sample Library
Name: hcdmex03.sas
Description: Example program from SAS/ETS High Performance
Procedures User's Guide, The HPCDM Procedure
Title: Scenario Analysis with Rich Regression Effects and BY Groups
Product: SAS High Performance Econometrics Software
Keys: Compound Distribution Modeling,
Aggregate Loss Modeling
PROC: HPCDM
Notes:
--------------------------------------------------------------*/
%let npolicy=5000;
%macro generateRegvars;
age = MAX(int(rand('NORMAL', 35, 15)),16)/50;
if (rand('UNIFORM') < 0.5) then do;
genderNum = 1; * female;
gender = 'F';
end;
else do;
genderNum = 2; * male;
gender = 'M';
end;
if (rand('UNIFORM') < 0.7) then do;
carTypeNum = 1; * sedan;
carType = 'Sedan';
end;
else do;
carTypeNum = 2; * SUV;
carType = 'SUV';
end;
educationLevel = rand('UNIFORM');
if (educationLevel < 0.5) then do;
educationNum = 1; *high school graduate;
education = 'High School';
end;
else if (educationLevel < 0.85) then do;
educationNum = 2; *college graduate;
education = 'College';
end;
else do;
educationNum = 3; *advanced degree;
education = 'Advanced Degree';
end;
annualmiles = MAX(1000, int(rand('NORMAL', 12000, 5000)))/5000;
carSafety = rand('UNIFORM'); * scaled to be between 0 & 1 *;
income = MAX(15000,int(rand('NORMAL', educationNum*30000, 50000)))/100000;
%mend;
data losses(keep=region policyholderId age gender carType
annualmiles education carSafety
income lossAmount noloss);
call streaminit(12345);
array cx{6} age genderF milesSUV milesSedan educationAdv educationColl;
array cbeta{7} _TEMPORARY_ (1 0.75 -1 -1.2 -0.6 0.5 0.75); * first one is for intercept;
array czx{4} age carTypeSUV educationAdv educationColl;
array czbeta{5} _TEMPORARY_ (-0.75 -1 -1.2 0.6 0.7); * first one is for intercept;
array sx{8} _temporary_;
array sbeta{9} _TEMPORARY_ (5 0.5 1.15 -0.75 -0.3 0.4 0.7 -0.5 -0.3); * first one is for intercept;
length gender $1 carType $8 education $16;
alpha = 0.9; theta = 1/alpha;
do iregion=1 to 2;
if (iregion = 1) then do;
region = 'East';
sigma = 0.5;
end;
else do;
region = 'West';
/* slightly change parameter estimates for this group */
do i=1 to dim(cbeta);
cbeta(i) = cbeta(i) + 0.1;
end;
do i=1 to dim(czbeta);
czbeta(i) = czbeta(i) + 0.1;
end;
do i=1 to dim(sbeta);
sbeta(i) = sbeta(i) + 0.05;
end;
sigma = 0.3;
alpha = 1.6; theta = 1/alpha;
end;
do policyholderId=1 to &npolicy;
* simulate policyholder and vehicle attributes *;
%generateRegvars;
do i=1 to dim(sx);
sx(i) = 0;
end;
if (carTypeNum = 2) then do;
sx(1) = 1;
carTypeSUV = 1;
milesSUV = annualmiles;
milesSedan = 0;
end;
else do;
carTypeSUV = 0;
milesSUV = 0;
milesSedan = annualmiles;
end;
if (genderNum = 1) then do;
sx(2) = 1;
genderF = 1;
end;
else genderF = 0;
sx(3) = carSafety;
sx(4) = income;
if (educationNum = 3 and carTypeNum = 2) then sx(5) = 1;
if (educationNum = 2 and carTypeNum = 2) then sx(6) = 1;
if (educationNum = 3 and carTypeNum = 1) then sx(7) = 1;
if (educationNum = 2 and carTypeNum = 1) then sx(8) = 1;
if (educationNum = 3) then do;
educationAdv = 1;
educationColl = 0;
end;
else do;
educationAdv = 0;
if (educationNum = 2) then educationColl = 1;
else educationColl = 0;
end;
* simulate number of losses incurred by this policyholder *;
czxbeta = czbeta(1);
do i=1 to dim(czx);
czxbeta = czxbeta + czx(i) * czbeta(i+1);
end;
phi = exp(czxbeta)/(1+exp(czxbeta));
if (rand('UNIFORM') < phi) then do;
* zero process is selected with probability phi;
numloss = 0;
end;
else do;
* negbin process is selected with probability (1-phi);
cxbeta = cbeta(1);
do i=1 to dim(cx);
cxbeta = cxbeta + cx(i) * cbeta(i+1);
end;
Mu = exp(cxbeta);
p = theta/(Mu+theta);
numloss = rand('NEGB',p,theta);
end;
* simulate severity of each of the losses *;
if (numloss > 0) then do;
noloss = 0;
Mu = sbeta(1);
do i=1 to dim(sx);
Mu = Mu + sx(i) * sbeta(i+1);
end;
do iloss=1 to numloss;
lossAmount = exp(Mu) * rand('LOGNORMAL')**Sigma;
output;
end;
end;
else do;
noloss = 1;
lossAmount = .;
output;
end;
end;
end;
run;
proc sort data=losses out=lossesByPolicy;
by region policyholderId;
run;
data losscounts(keep=region numloss age gender carType annualmiles education income);
set lossesByPolicy;
by region policyholderId;
retain numloss zeroloss 0;
if (noloss eq 1) then
zeroloss = zeroloss + 1;
else
numloss = numloss + 1;
if (last.policyholderId) then do;
output;
numloss = 0;
end;
run;
*** create scenario for internal count simulation *;
data scenario(keep=region policyholderId age gender
carType annualmiles education carSafety income);
call streaminit(456);
length region $4 gender $1 carType $8 education $16;
do iregion=1 to 2;
if (iregion = 1) then do;
region = 'East';
npolicy = 3;
end;
else do;
region = 'West';
npolicy = 5;
end;
do policyholderId = 1 to npolicy;
%generateRegvars;
output;
end;
end;
run;
proc severity data=losses(where=(not(missing(lossAmount))))
covout outstore=work.sevstore print=all plots=none;
by region;
loss lossAmount;
class carType gender education;
scalemodel carType gender carSafety income education*carType
income*gender carSafety*income;
dist logn burr;
run;
proc countreg data=losscounts covout;
by region;
class gender carType education;
model numloss = age income gender carType*annualmiles education / dist=negbin;
zeromodel numloss ~ age income carType education;
store cstore;
run;
/* Re-fit models after removing insignificant effects. */
proc severity data=losses(where=(not(missing(lossAmount))))
covout outstore=work.sevstore print=all plots=none;
by region;
loss lossAmount;
class carType gender education;
scalemodel carType gender carSafety income education*carType;
dist logn burr;
run;
proc countreg data=losscounts covout;
by region;
class gender carType education;
model numloss = age gender carType*annualmiles education / dist=negbin;
zeromodel numloss ~ age carType education;
store cstore;
run;
proc hpcdm data=scenario nreplicates=10000 seed=123 print=all
severitystore=work.sevstore countstore=work.cstore
nperturb=30;
by region;
severitymodel logn;
outsum out=agglossStats mean stddev skewness kurtosis pctlpts=(90 97.5 99.5);
run;