Getting Started with PROC HPCDM
/*--------------------------------------------------------------
SAS Sample Library
Name: hcdmgs.sas
Description: Example program from SAS/ETS High Performance
Procedures User's Guide, The HPCDM Procedure
Title: Getting Started with PROC HPCDM
Product: SAS High Performance Econometrics Software
Keys: Compound Distribution Modeling,
Aggregate Loss Modeling
PROC: HPCDM
Notes:
--------------------------------------------------------------*/
/* Simulate data for an intercept-only Poisson count model */
data claimcount(keep=numLosses);
call streaminit(12345);
label numLosses='Number of Loss Events in a Year';
Lambda = 2;
do n = 1 to 500;
numLosses = rand('POISSON',Lambda);
output;
end;
run;
/* Simulate data for gamma severity model */
data claimsev(keep=lossValue);
call streaminit(67890);
label y='Severity of a Loss Event';
Theta = 1000;
Alpha = 2;
do n = 1 to 500;
lossValue = quantile('Gamma', rand('UNIFORM'), Alpha, Theta);
output;
end;
run;
/* Fit an intercept-only Poisson count model and
write estimates to an item store */
proc countreg data=claimcount;
model numLosses= / dist=poisson;
store countStorePoisson;
run;
/* Fit severity models and write estimates to a data set */
proc severity data=claimsev criterion=aicc outest=sevest covout plots=none;
loss lossValue;
dist _predefined_;
run;
/* Simulate and estimate Poisson-gamma compound distribution model */
proc hpcdm countstore=countStorePoisson severityest=sevest
seed=13579 nreplicates=10000 plots=(edf(alpha=0.05) density)
print=(summarystatistics percentiles);
severitymodel gamma;
output out=aggregateLossSample samplevar=aggloss;
outsum out=aggregateLossSummary mean stddev skewness kurtosis
p01 p05 p95 p995=var pctlpts=90 97.5;
run;
/* Conduct parameter perturbation analysis of
the Poisson-gamma compound distribution model */
proc hpcdm countstore=countStorePoisson severityest=sevest
seed=13579 nreplicates=10000 nperturbedsamples=30
print(only)=(perturbsummary) plots=none;
severitymodel gamma;
output out=aggregateLossSample samplevar=aggloss;
outsum out=aggregateLossSummary mean stddev skewness kurtosis
p01 p05 p95 p995=var pctlpts=90 97.5;
run;
/* Simulate data for losses incurred by several policyholders
in some period of time */
data losses(keep=policyholderId age gender carType annualMiles
education carSafety income noloss lossamount);
call streaminit(12345);
array cx{5} age gender carType annualMiles education;
array cbeta{6} _TEMPORARY_ (1 -0.75 1 0.6 -1 -0.25);
array sx{3} carType carSafety income;
array sbeta{4} _TEMPORARY_ (3.5 1.5 -0.8 0.6);
alpha = 1/3; theta = 1/alpha;
Sigma = 1;
do policyholderId=1 to 5000;
/* simulate policyholder and vehicle attributes */
age = MAX(int(rand('NORMAL', 35, 15)),16)/50;
if (rand('UNIFORM') < 0.5) then gender = 1; * female;
else gender = 2; * male;
if (rand('UNIFORM') < 0.7) then carType = 1; * sedan;
else carType = 2; * SUV;
annualMiles = MAX(1000, int(rand('NORMAL', 12000, 5000)))/5000;
educationLevel = rand('UNIFORM');
if (educationLevel < 0.5) then education = 1; *high school graduate;
else if (educationLevel < 0.85) then education = 2; *college graduate;
else education = 3; *advanced degree;
carSafety = rand('UNIFORM'); /* scaled to be between 0 & 1 */
income = MAX(15000,int(rand('NORMAL', education*30000, 50000)))/100000;
/* simulate number of losses incurred by this policyholder */
cxbeta = cbeta(1);
do i=1 to dim(cx);
cxbeta = cxbeta + cx(i) * cbeta(i+1);
end;
Mu = exp(cxbeta);
p = theta/(Mu+theta);
numloss = rand('NEGB',p,theta);
/* simulate severity of each of the losses */
if (numloss > 0) then do;
noloss = 0;
do iloss=1 to numloss;
Mu = sbeta(1);
do i=1 to dim(sx);
Mu = Mu + sx(i) * sbeta(i+1);
end;
lossamount = exp(Mu) * rand('LOGNORMAL')**Sigma;
output;
end;
end;
else do;
noloss = 1;
lossamount = .;
output;
end;
end;
run;
/* Aggregate number of losses for each policyholder */
data losscounts(keep=age gender carType annualMiles education numloss);
set losses;
by policyholderId;
retain numloss 0;
if (noloss ne 1) then
numloss = numloss + 1;
if (last.policyholderId) then do;
output;
numloss = 0;
end;
run;
/* Fit negative binomial frequency model for the number of losses */
proc countreg data=losscounts;
model numloss = age gender carType annualMiles education / dist=negbin;
store work.countregmodel;
run;
/* Examine the parameter estimates for the model in the item store */
proc countreg restore=work.countregmodel;
show parameters;
run;
/* Fit severity models for the magnitude of losses */
proc severity data=losses plots=none outest=work.sevregest print=all;
loss lossamount;
scalemodel carType carSafety income;
dist _predef_;
nloptions maxiter=100;
run;
/* Generate the scenario data set for single policyholder */
data singlePolicy(keep=age gender carType annualMiles
education carSafety income);
call streaminit(67897);
age = MAX(int(rand('NORMAL', 35, 15)),16)/50;
if (rand('UNIFORM') < 0.5) then gender = 1; * female;
else gender = 2; * male;
if (rand('UNIFORM') < 0.7) then carType = 1; * sedan;
else carType = 2; * SUV;
annualMiles = MAX(1000, int(rand('NORMAL', 12000, 5000)))/5000;
educationLevel = rand('UNIFORM');
if (educationLevel < 0.5) then education = 1; *high school graduate;
else if (educationLevel < 0.85) then education = 2; *college graduate;
else education = 3; *advanced degree;
carSafety = rand('UNIFORM'); /* scaled to be between 0 & 1 */
income = MAX(15000,int(rand('NORMAL', education*30000, 50000)))/100000;
output;
run;
/* Simulate the aggregate loss distribution for the scenario
with single policyholder */
proc hpcdm data=singlePolicy nreplicates=10000 seed=13579 print=all
countstore=work.countregmodel severityest=work.sevregest;
severitymodel logn gpd;
outsum out=onepolicysum mean stddev skew kurtosis median
pctlpts=97.5 to 99.5 by 1;
run;
/* Generate the scenario data set for multiple policyholders */
data groupOfPolicies(keep=policyholderId age gender carType annualMiles
education carSafety income);
call streaminit(67897);
do policyholderId=1 to 5;
age = MAX(int(rand('NORMAL', 35, 15)),16)/50;
if (rand('UNIFORM') < 0.5) then gender = 1; * female;
else gender = 2; * male;
if (rand('UNIFORM') < 0.7) then carType = 1; * sedan;
else carType = 2; * SUV;
annualMiles = MAX(1000, int(rand('NORMAL', 12000, 5000)))/5000;
educationLevel = rand('UNIFORM');
if (educationLevel < 0.5) then education = 1; *high school graduate;
else if (educationLevel < 0.85) then education = 2; *college graduate;
else education = 3; *advanced degree;
carSafety = rand('UNIFORM'); /* scaled to be between 0 & 1 */
income = MAX(15000,int(rand('NORMAL', education*30000, 50000)))/100000;
output;
end;
run;
/* Simulate the aggregate loss distribution for the scenario
with multiple policyholders */
proc hpcdm data=groupOfPolicies nreplicates=10000 seed=13579 print=all
countstore=work.countregmodel severityest=work.sevregest
plots=(conditionaldensity(rightq=0.95)) nperturbedSamples=50;
severitymodel logn gpd;
outsum out=multipolicysum mean stddev skew kurtosis median
pctlpts=97.5 to 99.5 by 1;
run;