SEVERITY Utility Functions
/*--------------------------------------------------------------
SAS Sample Library
Name: svrtutil.sas
Description: Program defining utility functions for use with
SEVERITY.
Title: SEVERITY Utility Functions
Product: SAS/ETS Software
Keys: fitting continuous distributions
PROC: SEVERITY
Notes: If you run this sample program without any modification, then
the Sasuser.Svrtdist library contains functions and subroutines
that are identical to those in the Sashelp.Svrtdist library.
--------------------------------------------------------------*/
proc fcmp outlib=sasuser.svrtdist.utility;
/*======================================================================
*
* Arguments:
* dim - Dimension of the input array 'x'.
* x - Input array that contains the unique values in the sample.
* nx - Input array that contains the number of times each value in
* array 'x' appears in the sample.
* nRaw - Number of raw moments requested. Upon return, 'raw' would
* contain first nRaw moments.
* raw - An array that would contain requested raw moments upon
* return.
* You need not allocate any space prior to this call.
*======================================================================*/
subroutine SVRTUTIL_RAWMOMENTS(dim, x[*], nx[*], nRaw, raw[*]);
outargs raw;
if (dim < 1 or nRaw < 1) then return;
do j=1 to nRaw;
raw[j] = 0;
end;
n = 0;
do i=1 to dim;
if (not(missing(x[i]) or nx[i] <= 0)) then do;
t1 = x[i];
do j=1 to nRaw;
raw[j] = raw[j] + nx[i] * t1;
t1 = t1 * x[i];
end;
n = n + nx[i];
end;
end;
if (n <= 0) then do;
do j=1 to nRaw;
raw[j] = .;
end;
end;
else do;
do j=1 to nRaw;
raw[j] = raw[j] / n;
end;
end;
endsub;
/*======================================================================
* Computes and returns the requested percentile of the given sample
* given an estimate of its empirical distribution function (EDF).
* This is an empirical estimate of the percentile.
*
* Arguments:
* p - Requested percentile expressed as a fraction in the range
* [0,1]. Function returns 100*p th empirical percentile.
* dim - Dimension of the input array 'x'.
* x - Input array that contains the values in the sample.
* F - Input array that contains an estimate of the EDF. F[i] is
* expected to contain an empirical estimate of Pr(X <= x[i]).
* Ftype - Type of estimator that was used to compute F
* If 1, standard estimator
* If 2, a product-limit estimator (Kaplan-Meier)
* If 3, Turnbull's estimator
* Types 1 & 2 implies individual data, whereas Type 3 implies
* grouped (interval-censored) data.
* Further, type 1 implies no censoring or truncation
* was used and type 2 implies one form of censoring
* (left or right, but not both) was used with optional
* truncation.
* For these two types, x and F are provided at individual data
* points such that EDF is constant at F[i] for the semi-closed
* interval [x[i],x[i+1]). It can be assumed that x contains
* distinct values.
* This function treats types 1 and 2 identically.
* For grouped data, x and F are provided for disjoint intervals
* such that F is constant between intervals and the increase
* in F from the left endpoint of the interval to the right is
* arbitrary. Note that left- and right-endpoints of some
* intervals might be identical (i.e. a point-interval) if a mix
* of individual and grouped data was used to compute the EDF.
* This function assumes a linear increase in F from left to
* right endpoint.
*======================================================================*/
function SVRTUTIL_PERCENTILE(p, dim, x[*], F[*], Ftype);
eps = constant('MACEPS');
if (dim < 1 or MISSING(p) or p < 0 or p > 1+eps) then return (.);
if (Ftype=1) then do;
/* ungrouped data (no censoring/truncation) */
dimp1 = dim+1;
if (p >= 1/dimp1-eps) then do;
if (p <= dim/dimp1+eps) then do;
/* return smoothed estimate as per "Loss Models" by
Klugman et. al. (1998) pg. 35 */
h = dimp1*p;
g = floor(h);
h = h - g;
return (x[g]*(1-h)+x[g+1]*h);
end;
else do;
return (x[dim]);
end;
end;
else do;
return (max(0,(x[1]-eps)/2)); /* assume F is 0 in [0,x[1]) */
end;
end;
else do;
if (Ftype=2) then do;
/* ungrouped data (truncation and one form of censoring).
Kaplan-Meier estimate */
if (p <= F[1]+eps) then do;
if (p < F[1]-eps) then /* p < F[1] */
return (max(0,(x[1]-eps)/2)); /* F is 0 in [0,x[1]) */
else do; /* p = F[1] */
if (dim > 1) then
return ((x[1]+x[2])/2);
else
return (x[1]);
end;
end;
if (p > F[dim]+eps) then
return (x[dim]); /* assume F is flat after last point */
do i=2 to dim;
if (p < F[i]-eps) then do; /* F[i-1] < p < F[i] */
return (x[i]); /* p is along the step so return x[i] */
end;
else do;
if (p <= F[i]+eps) then do; /* p = F[i] */
if (i < dim) then do;
return ((x[i]+x[i+1])/2);
end;
else do;
return (x[i]);
end;
end;
end;
end;
end;
else do;
/* grouped data (Turnbull's estimator) */
if (p <= F[1]+eps) then do;
if (p < F[1]-eps) then /* p < F[1] */
return (max(0,(x[1]-eps)/2)); /* F is 0 in [0,x[1]) */
else /* p = F[1] */
return (x[1]);
end;
if (p > F[dim]+eps) then
return (x[dim]); /* assume F is flat after last point */
do i=1 to dim-1 by 2;
if (p < F[i+1]-eps) then do;
if (F[i+1] > F[i]+eps) then do;
return (x[i]+(p-F[i])*(x[i+1]-x[i])/(F[i+1]-F[i]));
end;
else do;
return ((x[i]+x[i+1])/2);
end;
end;
else do;
if (p <= F[i+1]+eps) then do;
if (i < dim-1) then do;
return ((x[i+1]+x[i+2])/2);
end;
else do;
return (x[i+1]);
end;
end;
end;
end;
end;
end;
endsub;
/* Utility subroutine used by SVRTUTIL_SORT.
* Given two adjacent partitions, with entries numbered a..(b-1) and
* b..(c-1), it swaps the smaller partition with the outer entries of
* the larger. a, b, c should be 0-based indices. */
subroutine svrtutil_zs_vecswp(x[*], a, b, c);
outargs x;
__a = a; __b = b; __c = c;
__n = MIN((__b - __a), (__c - __b));
__c = __c - __n;
do while(__n > 0);
__i = __a+1; __j = __c+1;
__t = x[__i]; x[__i] = x[__j]; x[__j] = __t; /* zs_swp */
__n = __n - 1;
__a = __i;
__c = __j;
end;
endsub;
/*======================================================================
* Sorts the given input sample of numeric values in ascending or
* descending order.
*
* Arguments:
* dim - Dimension of the input array 'x'. This array will be
* sorted in-place.
* x - Input array that contains the values in the sample.
* isDescending - Desired order. If 0, then the array is sorted in
* ascending order. If non-zero, then the array is sorted
* in descending order.
*======================================================================*/
subroutine SVRTUTIL_SORT(dim, x[*], isDescending);
outargs x;
array _st_base[64] /nosymbols;
array _st_num[64] /nosymbols;
array _st_mth[64] /nosymbols;
array _med[13] /nosymbols;
if (isDescending) then do;
/* change signs so ascending sort code can be used */
do _n=1 to dim;
x[_n] = -x[_n];
end;
end;
_n = dim;
_a = 0;
_st_n = 0;
_mth = 1; /* 1: sort precheck, 2: swap-limited insertion sort */
do while(1);
bigloop_begin:;
/* process section [_a, _a+_n) */
/* insertion sort on small (or non-random) arrays */
if (_n < 8 or _mth ne 0) then do;
if (_n < 8) then do;
_s = 8*8;
end;
else do;
if (_mth = 1) then
_s = 1;
else
_s = _n;
end;
do _m = _a+1 to _a+_n-1;
do _l = _m to _a+1 by -1;
/* zs_cmp */
if (not(x[_l] > x[_l+1])) then do; /* equal or smaller */
go to loopend1;
end;
else do; /* greater */
_j = _l+1;
__t = x[_l]; x[_l] = x[_j]; x[_j] = __t; /* zs_swp */
_s = _s - 1;
if (_s = 0) then go to _punt_isort;
end;
end;
loopend1:;
end;
if (_st_n = 0) then
go to bigloop_done;
_st_n = _st_n - 1;
_a = _st_base[_st_n+1];
_n = _st_num[_st_n+1];
_mth = _st_mth[_st_n+1];
go to bigloop_begin;
end;
_punt_isort:;
/* choose partition value */
_l = _a; _m = _a + int(_n/2); _h = _a + _n-1;
_med[1] = _l; _med[2] = _m; _med[3] = _h;
_v = 2;
if (_n > 40) then do;
/* pseudo-median of 9 */
_s = int(_n/8);
_med[1] = _l; _med[2] = _l+_s; _med[3] = _l+2*_s;
_med[4] = _m - _s; _med[5] = _m; _med[6] = _m + _s;
_med[7] = _h - 2*_s; _med[8] = _h - _s; _med[9] = _h;
_v = 8;
end;
do _s = 0 to _v-1 by 3;
_l = _med[_s+1]; _m = _med[_s+2]; _h = _med[_s+3];
_v = _v + 1;
/* set _med[_v+1] to the index of the median of values at
indices _l, _m, _h */
/* zs_cmp */
if (x[_l+1] = x[_m+1]) then do;
_med[_v+1] = _m; go to loopend2;
end;
else do;
if (x[_l+1] > x[_m+1]) then do;
__t = _l; _l = _m; _m = __t;
end;
/* zs_cmp */
if (not(x[_m+1] > x[_h+1])) then do;
/* equal or smaller */
_med[_v+1] = _m; go to loopend2;
end;
else do;
/* zs_cmp */
if (not(x[_l+1] > x[_h+1])) then do;
/* equal or smaller */
_med[_v+1] = _h; go to loopend2;
end;
else do;
_med[_v+1] = _l;
end;
end;
end;
end;
loopend2:;
_m = _med[_v+1];
_orig_a = _a; _v = _a;
_i = _v+1; _j = _m+1;
__t = x[_i]; x[_i] = x[_j]; x[_j] = __t; /* zs_swp */
_c = _a + _n-1; _d = _c;
_a = _a + 1; _b = _a; /* skip over partition value */
_mth = 2;
do while(1);
do while (_b <= _c);
/* zs_cmp */
if (x[_b+1] > x[_v+1]) then do;
go to zs_b_high;
end;
else do;
if (x[_b+1] = x[_v+1]) then do;
/* equal */
_i = _a+1; _j = _b+1;
__t = x[_i]; x[_i] = x[_j]; x[_j] = __t; /* zs_swp */
_a = _a + 1;
end;
/* zs_b_low: */
_b = _b + 1;
end;
end;
go to loopend3;
zs_b_high:;
do while (_b <= _c);
/* zs_cmp */
if (x[_c+1] < x[_v+1]) then do;
go to zs_c_low;
end;
else do;
if (x[_c+1] = x[_v+1]) then do;
/* equal */
_i = _c+1; _j = _d+1;
__t = x[_i]; x[_i] = x[_j]; x[_j] = __t; /* zs_swp */
_d = _d - 1;
end;
/* zs_c_high: */
_c = _c - 1;
end;
end;
go to loopend3;
zs_c_low:;
_i = _b+1; _j = _c+1;
__t = x[_i]; x[_i] = x[_j]; x[_j] = __t; /* zs_swp */
_b = _b + 1;
_c = _c - 1;
_mth = 0;
end;
loopend3:;
/* swap equal items back to middle */
call svrtutil_zs_vecswp(x, _orig_a, _a, _b);
call svrtutil_zs_vecswp(x, _c+1, _d+1, _orig_a + _n);
/* push larger partition onto stack */
_d = _d - _c; _c = _orig_a+_n-_d;
_b = _b - _a; _a = _orig_a; _n = _b;
if (_d < _b) then do;
_a = _c;
_n = _d;
_c = _orig_a;
_d = _b;
end;
_st_base[_st_n+1] = _c;
_st_num[_st_n+1] = _d;
_st_mth[_st_n+1] = _mth;
_st_n = _st_n + 1;
end;
bigloop_done:;
if (isDescending) then do;
/* change signs back to original signs */
do _n=1 to dim;
x[_n] = -x[_n];
end;
end;
endsub;
/* Utility function used by SVRTUTIL_HILLCUTOFF function, to implement
* one step of the Hill's algorithm. */
function svrtutil_hillutil_minMSEindex(n,x[*],b,m,samp[*],mse[*],Qopt);
outargs samp, mse, Qopt;
do k=1 to m-1;
mse[k] = 0;
end;
do ib=1 to b;
/* form a bootstrap sample (all elements of x are assumed to
non-missing and positive) */
do is = 1 to m;
ix = floor(rand('uniform')*n)+1;
samp[is] = x[ix];
end;
/* sort the sample */
call svrtutil_sort(m, samp, 0);
/* compute mse for each k */
do k=1 to m-1;
cvGamma = 0; /* control variate for gamma estimate */
gamma = 0; /* Hill's tail index estimate for given k */
_t1 = log(samp[m-k]);
do i=1 to k;
_t2 = log(samp[m-i+1]) - _t1;
cvGamma = cvGamma + _t2**2;
gamma = gamma + _t2;
end;
cvGamma = cvGamma/k;
gamma = gamma/k;
_mse = (cvGamma - 2*gamma**2)**2;
mse[k] = mse[k] + _mse;
end;
end;
/* find k that minimizes MSE averaged over all bootstrap samples */
_kopt = 1;
Qopt = mse[1]/b;
do k=2 to m-1;
_Q = mse[k]/b;
if(_Q < Qopt) then do;
_kopt = k;
Qopt = _Q;
end;
end;
return(_kopt);
endsub;
/*======================================================================
* Returns the tail cutoff point using the Hill algorithm described
* in the following paper:
* Danielsson, J., L. De Haan, L. Peng, and C. G. de Vries. 2001.
* "Using a Bootstrap Method to Choose the Sample Fraction in Tail Index
* Estimation." Journal of Multivariate Analysis 76:226-248.
*
* Arguments:
* n - Number of severity values in the array x. Must be > 5.
* x - Array of severity values.
* b - Number of bootstrap samples taken to compute the MSE.
* If < 10, default value of 50 is used.
* s - Maximum number of values to try for searching optimal sample
* size (m1) to be used for step 1 of the algorithm. If < 2,
* default of 10 will be used. Equally spaced values in the
* range [n**0.75,n) are tried.
* status - Upon return, this argument contains 0 or 1; 0 indicates
* that the algorithm computed the cutoff point successfully.
* 1 indicates that the algorithm could not compute the cutoff
* point successfully, in which case, 5th largest value of the
* input sample is returned as the approximate cutoff point.
*======================================================================*/
function SVRTUTIL_HILLCUTOFF(n, x[*], b, s, status);
outargs status;
status = .;
_n = int(n);
if (_n < 6) then
/* argument error */
return (.A);
/* form an internal copy of input array that contains only the
non-missing, positive values */
array _x[1] /nosymbols;
call dynamic_array(_x, _n);
nvalid = 0;
do ix=1 to _n;
if (not(missing(x[ix])) and x[ix] > 0) then do;
nvalid = nvalid + 1;
_x[nvalid] = x[ix];
end;
end;
if (nvalid < m1) then
/* insufficient number of valid values. cannot proceed. */
return (.I);
/*------- validate & set b value --------*/
_b = int(b);
if (_b < 10) then
_b = 50; /* default number of bootstrap samples */
/* following m1low setting ensures that m2 >= sqrt(nvalid);
alternate is to use int(sqrt(nvalid))+1, but that
causes m2 to start from close to 1. */
m1low = MAX(2, floor(nvalid**0.75));
m1high = nvalid - 1;
if (s < 2) then
m1steps = 10;
else
m1steps = s;
m1incr = int((m1high-m1low)/m1steps)-1;
minQratio = .;
kopt = .;
array samp[1] /nosymbols;
array mse[1] /nosymbols;
/*---- iterate over m1 (sample size for the first step) ----*/
do m1 = m1low to m1high by m1incr;
/*------- Step 1 (samples of size m1) --------*/
call dynamic_array(samp, m1);
call dynamic_array(mse, m1);
k1 = svrtutil_hillutil_minMSEindex(nvalid, _x, _b, m1, samp,
mse, Q1);
if (k1 > 1) then do;
/*------- Step 2 (samples of size m2) --------*/
m2 = int(m1*m1/nvalid);
k2 = svrtutil_hillutil_minMSEindex(nvalid, _x, _b, m2, samp,
mse, Q2);
if (Q2 = 0) then
Qratio = .;
else
Qratio = Q1**2/Q2;
/*------- Compute cutoff index --------*/
_lk1_by_lm1 = log(k1)/log(m1);
k = (k1**2/k2) * (2/_lk1_by_lm1 - 1)**(-2 + 2*_lk1_by_lm1);
if (not(missing(Qratio))) then do;
/*----- Check if Qratio is minimal for current m1 -----*/
if (missing(minQratio) or Qratio < minQratio) then do;
kopt = k;
minQratio = Qratio;
end;
end;
end;
end; /*----- end loop over m1 -----*/
/*------- return element at rank kopt+1 as cutoff point -------*/
call svrtutil_sort(nvalid, _x, 0);
if (not(missing(kopt)) and kopt < nvalid) then do;
status = 0;
return(_x[int(kopt+1)]);
end;
else do;
/* the algorithm failed to compute valid cutoff index;
return fifth largest value in the sample. */
put 'NOTE: SVRTUTIL_HILLCUTOFF failed to compute a valid cutoff point.' @@;
put 'Returning the 5th largest value in the sample as the cutoff point.';
status = 1;
return (_x[nvalid-4]);
/* alternate is to return 95th percentile, as follows:
array _xuniq[1] /nosymbols;
array _F[1] /nosymbols;
call dynamic_array(_xuniq, nvalid);
call dynamic_array(_F, nvalid);
j = 0;
eps = constant('MACEPS');
do i=1 to nvalid;
k = i+1;
do while(k <= nvalid);
if (abs(_x[i]-_x[k]) <= eps) then
k = k+1;
else
go to _eqloopend;
end;
_eqloopend:;
j = j+1;
_xuniq[j] = _x[i];
_F[j] = (k-1)/nvalid;
end;
return(SVRTUTIL_PERCENTILE(0.95, j, _xuniq, _F));
*/
end;
endsub;
function SVRTUTIL_EDF(y, dim, x[*], F[*], Ftype);
if (missing(y)) then
return(.);
eps = constant('MACEPS');
if (Ftype = 1 or Ftype = 2) then do;
/* individual data */
if (y <= x[1]+eps) then /* y <= x[1] */
if (y < x[1]-eps) then /* y < x[1] */
return (0); /* assume F is 0 in [0,x[1]) */
else /* y = x[1] */
return (F[1]);
if (y > x[dim]+eps) then /* y > x[dim] */
return (F[dim]); /* assume that F is flat after last point */
edf = .;
do i = 2 to dim;
if (y < x[i]-eps) then do; /* x[i-1] < y < x[i] */
edf = F[i-1]; /* F is F[i-1] in [x[i-1],x[i]) */
go to doneEDF;
end;
else if (y <= x[i]+eps) then do; /* y = x[i] */
edf = F[i]; /* F jumps to F[i] at x[i] */
go to doneEDF;
end;
/* continue if y > x[i] */
end;
end;
else do;
/* grouped data */
if (y <= x[1]+eps) then do;
/* y <= x[1] */
if (y < x[1]-eps) then /* y < x[1] */
return (0); /* assume F is 0 in [0,x[1]) */
else do;
/* y = x[1] */
if (1 < dim) then do;
if (x[2] <= x[1]+eps) then
edf = F[2];
else
edf = F[1];
end;
else
edf = F[1];
go to doneEDF;
end;
end;
if (y > x[dim]+eps) then /* y > x[dim] */
return (F[dim]); /* assume that F is flat after last point */
edf = .;
do i = 2 to dim;
/* y is already established to be greater than x[i-1] */
if (y < x[i]-eps) then do; /* x[i-1] < y < x[i] */
edf = F[i-1] + (y-x[i-1])*(F[i]-F[i-1])/(x[i]-x[i-1]);
go to doneEDF;
end;
else if (y <= x[i]+eps) then do; /* y = x[i] */
if (i < dim) then do;
if (x[i+1] <= x[i]+eps) then
edf = F[i+1];
else
edf = F[i];
end;
else
edf = F[i];
go to doneEDF;
end;
/* continue if y > x[i] */
end;
end;
doneEDF:
return(edf);
endsub;
function SVRTUTIL_EMPLIMMOMENT(k, u, dim, x[*], F[*], Ftype);
if (missing(k) or missing(u)) then
return (.);
if (k < 1) then
return (.);
eps = constant('MACEPS');
if (u < x[1]-eps) then do;
elm = u**k;
go to doneELMOM;
end;
kp1 = k+1;
kokp1 = k/(k+1);
elm = x[1]**k;
if (Ftype = 1 or Ftype = 2) then do;
/* individual data */
do i = 1 to dim-1;
if (u <= x[i]+eps) then do;
go to doneELMOM;
end;
if (u < x[i+1]-eps) then do;
elm = elm + (1-F[i])*(u**k - x[i]**k);
go to doneELMOM;
end;
/* u >= x[i+1] */
elm = elm + (1-F[i])*(x[i+1]**k - x[i]**k);
end;
end;
else do;
/* grouped data */
do i = 1 to dim-1;
if (u <= x[i]+eps) then do;
go to doneELMOM;
end;
if (x[i+1] > x[i]+eps) then do;
si = (F[i+1]-F[i])/(x[i+1]-x[i]);
if (si > eps) then do;
if (u < x[i+1]-eps) then do;
elm = elm + (1-F[i]+si*x[i])*(u**k - x[i]**k)
- si*kokp1*(u**kp1 - x[i]**kp1);
go to doneELMOM;
end;
/* u >= x[i+1] */
elm = elm + (1-F[i]+si*x[i])*(x[i+1]**k - x[i]**k)
- si*kokp1*(x[i+1]**kp1 - x[i]**kp1);
end;
else do;
/* si=0 */
if (u < x[i+1]-eps) then do;
elm = elm + (1-F[i])*(u**k - x[i]**k);
go to doneELMOM;
end;
/* u >= x[i+1] */
elm = elm + (1-F[i])*(x[i+1]**k - x[i]**k);
end;
end;
end;
end;
if (u > x[dim]+eps) then do;
elm = elm + (1-F[dim])*(u**k - x[dim]**k);
end;
doneELMOM:
return(elm);
endsub;
quit;