Categorical Linear Models
/****************************************************************/
/* S A S S A M P L E L I B R A R Y */
/* */
/* NAME: CATLIN */
/* TITLE: Categorical Linear Models */
/* PRODUCT: IML */
/* SYSTEM: ALL */
/* KEYS: MATRIX STATAPP SUGI6 */
/* PROCS: IML */
/* DATA: */
/* */
/* SUPPORT: Rick Wicklin UPDATE: SEP2013 */
/* REF: */
/* */
/****************************************************************/
proc iml ;
/* Subroutine to compute new probability estimates */
/* Last column not needed since sum of each row is 1 */
start prob(x, beta, q);
la = exp(x*shape(beta,0,q));
pi = la / ((1+la[,+])*repeat(1,1,q));
return( colvec(pi) );
finish prob;
/* Categorical Linear Models */
/* by Least Squares and Maximum Likelihood */
/* Input: */
/* n the s by p matrix of response counts */
/* x the s by r design matrix */
start catlin(n, x);
s = nrow(n); /* number of populations */
r = ncol(n); /* number of responses */
q = r-1; /* number of function values */
d = ncol(x); /* number of design parameters */
qd = q*d; /* total number of parameters */
/* initial (emprical) probability estimates */
rown = n[,+]; /* row totals */
pr = n/rown; /* probability estimates */
print pr[label="Initial Probability Estimates"];
/* function of probabilities */
p = colvec(pr[,1:q]); /* cut and shaped to vector */
f = log(p) - log(pr[,r])@repeat(1,q,1);
/* estimate by the GSK method */
/* inverse covariance of f */
si = (diag(p)-p*p`) # (diag(rown)@repeat(1,q,q));
z = x@I(q); /* expanded design matrix */
h = z`*si*z; /* crossproducts matrix */
g = z`*si*f; /* cross with f */
beta = solve(h,g); /* least squares solution */
stderr = sqrt(vecdiag(inv(h))); /* standard errors */
pi = prob(x, beta, q);
est = beta || stderr;
pr = shape(pi, 0, q);
print est[colname={"beta" "stderr"} label="GSK Estimates"], pr;
/* ML solution */
crit = 1;
do it = 1 to 8 while(crit>.0005);/* iterate until converge*/
/* block diagonal weighting */
si = (diag(pi)-pi*pi`) #( diag(rown)@repeat(1,q,q));
g = z`*(rown@repeat(1,q,1)#(p-pi)); /* gradient */
h = z`*si*z; /* hessian */
delta = solve(h,g); /* correction via Newton's method */
beta = beta+delta; /* apply the correction */
pi = prob(x, beta, q); /* compute prob estimates */
crit = max(abs(delta)); /* convergence criterion */
end;
stderr = sqrt(vecdiag(inv(h))); /* standard errors */
est = beta || stderr;
pr = shape(pi, 0, q);
print est[colname={"beta" "stderr"} label="ML Estimates"], pr;
print it[label="Iterations"] crit[label="Criterion"];
finish catlin;
/* frequency counts*/
n= { 58 11 05,
75 19 07,
49 14 10,
58 17 08,
33 18 15,
45 22 10,
15 13 15,
39 22 18,
04 12 17,
05 15 08};
/* design matrix */
x= { 1 1 1 0 0 0,
1 -1 1 0 0 0,
1 1 0 1 0 0,
1 -1 0 1 0 0,
1 1 0 0 1 0,
1 -1 0 0 1 0,
1 1 0 0 0 1,
1 -1 0 0 0 1,
1 1 -1 -1 -1 -1,
1 -1 -1 -1 -1 -1};
run catlin(n, x);