Fitting the Parameters of a Simple Beta Distribution
/****************************************************************/
/* S A S S A M P L E L I B R A R Y */
/* */
/* NAME: betafit */
/* TITLE: Fitting the Parameters of a Simple Beta Distribution*/
/* PRODUCT: STAT */
/* SYSTEM: All */
/* KEYS: nonlinear models, */
/* PROCS: NLIN MEANS (NLMIXED, GLIMMIX) */
/* DATA: */
/* */
/* REF: Charnes, Frome and Yu, J. Amer. Stat. Soc. V71, */
/* p 169 (1976) */
/****************************************************************/
/* Fitting the parameters of a beta distribution using */
/* maximum likelihood */
/* Generate data from the Beta distribution */
data sim;
do i=1 to 100;
x = betaInv(ranuni(1),2,4);
output;
end;
run;
/* Transform to log(x) and log(1-x) */
data one; set sim;
x1 = log(x);
x2 = log(1-x);
run;
proc means data=one noprint;
var x x1 x2;
output out=one sum=x x1 x2 n=n var=v;
run;
data one; set one;
/* Method of moments initial estimates*/
/* of a and b. */
/* mean(x) = a/(a+b), var(x) = ab/( (a+b)**2 (a+b+1) ) */
/* hence (a+b) = mu(1-mu)/sigma**2 - 1 */
/* b = (1-mu)(a+b) */
a_init = (x/n)*(1-x/n)/v-1;
if (a_init < 0) then a_init=1;
b_init = (1-x / n) * a_init;
a_init = A_INIT - B_INIT;
if (a_init < 1) then a_init = 1;
if (b_init < 1) then b_inti = 1;
/* Create two observations, one for log(x) and one for log(1-x) */
type=0; x=x1; output;
type=1; x=x2; output;
keep x n type a_init b_init;
run;
title "Using NLIN to fit the parameters of the beta distribution";
/* ------------------------------------------------------------
*
* Equations are:
*
* grad = 0 = du/dp sigma(-1) r
*
* where u are the two means of log(x) and log(1-x)
* as functions of a and b, p is the parameter in question,
* sigma is the variance covariance matrix of
* (log(x), log(1-x) ) and r is the vector of residuals,
* ( log(x) - predicted mean, log(1-x) - predicted mean .
*
* hessian = H = du/dp sigma(-1) du/dp.
*
* These equations are realized in the NLIN situation by
* factor sigma = L*L` and using
* L(-1) * r
* L(-1) * (du/dp)
* as two independent observations. The effect is to
* reproduce the gradient and hessian equation exactly.
* Note that the data have been concentrated first.
* Sum ( log(x) ) and Sum( log(1-x) ) are sufficient
* statistics for this problems.
* Note that the resulting variance covariance matrix is
* the usual maximum likelihood estimate and that for this
* model the expected and observed information matrices are
* identical. The actual residual sum of squares is zero in
* this implementation, as are the residual degrees of freedom.
* --------------------------------------------------------------- */
proc nlin sigsq=1; /* to generate the ML standard errors */
parms a=4 b=4;
retain z ta tb tab wab waa wbb lab laa lbb ua ub;
/* 2 by 2 Cholesky root of the variance matrix */
if _ITER_ = 0 and _OBS_=1 then do;
a = a_init;
b = b_init;
end;
if _OBS_=1 then do;
/* need the trigamma function, a numerical derivative of */
/* the digamma function will suffice. */
/* Variance matrix */
waa = n*(trigamma(A)-trigamma(a+b));
wab = n*( -trigamma(a+b));
wbb = n*(trigamma(B)-trigamma(a+b));
laa = sqrt(waa); /* Cholesky root */
lab = wab/laa;
lbb = sqrt(wbb-lab*lab);
ua = n*(digamma(a)-digamma(a+b)); /* expect of log(x) */
ub = n*(digamma(b)-digamma(a+b)); /* expect of log(1-x) */
end;
if type=0 then do; /* poor mans Cholesky root application */
da = waa/laa; /* derivatives of residuals */
db = wab/laa;
s = (x-ua)/laa; /* residual: log(x)-expected value of log x */
z = s; /* save for other log(1-x) */
l = a*x-n*(lgamma(a)+lgamma(b)-lgamma(a+b));
end;
else if type=1 then do; /* FOR LOG(1-X) COMPONENT */
da = (wab-lab*waa/laa)/lbb;
db = (wbb-lab*wab/laa)/lbb;
s = ((x-ub)-lab*z/laa)/lbb;
l = b*x;
end;
/* NLIN fit */
MODEL s = 0; /* S already set to be residual */
_LOSS_ = -L; /* Likelihood function for loss */
DER.A = da; /* DER with resp. to a and b */
DER.B = db;
run;
/*-------------------------------------------------------*/
/*---Using PROC NLMIXED to obtain MLE of beta dist. ---*/
/*---The log-likelihood function is computed directly ---*/
/*---with SAS programming statements. ---*/
proc nlmixed data=sim;
parms a=1 b=1;
bounds a > 0, b > 0;
ll = lgamma(a+b) - lgamma(a) - lgamma(b) +
(a-1)*log(x) + (b-1)*log(1-x);
model x ~ general(ll);
ods select ParameterEstimates;
run;
/*-------------------------------------------------------*/
/*-------------------------------------------------------*/
/*---Using PROC GLIMMIX to obtain MLE of beta dist. ---*/
/*---The parameterization of the beta distribution in ---*/
/*---the GLIMMIX procedure relates to the parameteri- ---*/
/*---zation used in the previous code as follows: ---*/
/*--- ---*/
/*---log(mu/(1-mu)) = Intercept = log(a/b) ---*/
/*--- Scale = a + b ---*/
title 'Using PROC GLIMMIX to fit parameters of a beta distribution';
proc glimmix data=sim;
model x = / dist=beta s;
ods select ModelInfo ParameterEstimates;
run;
/*-------------------------------------------------------*/