## Categorical Linear Models

```/****************************************************************/
/*          S A S   S A M P L E   L I B R A R Y                 */
/*                                                              */
/*    NAME: CATLIN                                              */
/*   TITLE: Categorical Linear Models                           */
/* PRODUCT: IML                                                 */
/*  SYSTEM: ALL                                                 */
/*    KEYS: MATRIX  STATAPP SUGI6                               */
/*   PROCS: IML                                                 */
/*    DATA:                                                     */
/*                                                              */
/* SUPPORT: Rick Wicklin                UPDATE: SEP2013         */
/*     REF:                                                     */
/*                                                              */
/****************************************************************/

proc iml ;
/* Subroutine to compute new probability estimates   */
/* Last column not needed since sum of each row is 1 */
start prob(x, beta, q);
la = exp(x*shape(beta,0,q));
pi = la / ((1+la[,+])*repeat(1,1,q));
return( colvec(pi) );
finish prob;

/* Categorical Linear Models                    */
/* by Least Squares and Maximum Likelihood      */
/*  Input:                                      */
/*     n the s by p matrix of response counts   */
/*     x the s by r design matrix               */
start catlin(n, x);
s = nrow(n);                     /* number of populations */
r = ncol(n);                       /* number of responses */
q = r-1;                     /* number of function values */
d = ncol(x);               /* number of design parameters */
qd = q*d;                   /* total number of parameters */

/* initial (emprical) probability estimates */
rown = n[,+];                               /* row totals */
pr = n/rown;                     /* probability estimates */
print pr[label="Initial Probability Estimates"];

/* function of probabilities */
p = colvec(pr[,1:q]);         /* cut and shaped to vector */
f = log(p) - log(pr[,r])@repeat(1,q,1);

/* estimate by the GSK method */
/* inverse covariance of f    */
si = (diag(p)-p*p`) # (diag(rown)@repeat(1,q,q));
z = x@I(q);                     /* expanded design matrix */
h = z`*si*z;                      /* crossproducts matrix */
g = z`*si*f;                              /* cross with f */
beta = solve(h,g);              /* least squares solution */
stderr = sqrt(vecdiag(inv(h)));        /* standard errors */
pi = prob(x, beta, q);
est = beta || stderr;
pr = shape(pi, 0, q);
print est[colname={"beta" "stderr"} label="GSK Estimates"], pr;

/* ML solution   */
crit = 1;
do it = 1 to 8 while(crit>.0005);/* iterate until converge*/
/* block diagonal weighting  */
si = (diag(pi)-pi*pi`) #( diag(rown)@repeat(1,q,q));
g = z`*(rown@repeat(1,q,1)#(p-pi));        /* gradient */
h = z`*si*z;                                /* hessian */
delta = solve(h,g);  /* correction via Newton's method */
beta = beta+delta;             /* apply the correction */
pi = prob(x, beta, q);       /* compute prob estimates */
crit = max(abs(delta));       /* convergence criterion */
end;
stderr = sqrt(vecdiag(inv(h)));        /* standard errors */
est = beta || stderr;
pr = shape(pi, 0, q);
print est[colname={"beta" "stderr"} label="ML Estimates"], pr;
print it[label="Iterations"]  crit[label="Criterion"];
finish catlin;

/* frequency counts*/
n= { 58 11 05,
75 19 07,
49 14 10,
58 17 08,
33 18 15,
45 22 10,
15 13 15,
39 22 18,
04 12 17,
05 15 08};

/* design matrix */
x= { 1  1  1  0  0  0,
1 -1  1  0  0  0,
1  1  0  1  0  0,
1 -1  0  1  0  0,
1  1  0  0  1  0,
1 -1  0  0  1  0,
1  1  0  0  0  1,
1 -1  0  0  0  1,
1  1 -1 -1 -1 -1,
1 -1 -1 -1 -1 -1};

run catlin(n, x);

```