Categorical Linear Models

/****************************************************************/
/*          S A S   S A M P L E   L I B R A R Y                 */
/*                                                              */
/*    NAME: CATLIN                                              */
/*   TITLE: Categorical Linear Models                           */
/* PRODUCT: IML                                                 */
/*  SYSTEM: ALL                                                 */
/*    KEYS: MATRIX  STATAPP SUGI6                               */
/*   PROCS: IML                                                 */
/*    DATA:                                                     */
/*                                                              */
/* SUPPORT: Rick Wicklin                UPDATE: SEP2013         */
/*     REF:                                                     */
/*                                                              */
/****************************************************************/


proc iml ;
/* Subroutine to compute new probability estimates   */
/* Last column not needed since sum of each row is 1 */
start prob(x, beta, q);
   la = exp(x*shape(beta,0,q));
   pi = la / ((1+la[,+])*repeat(1,1,q));
   return( colvec(pi) );
finish prob;

/* Categorical Linear Models                    */
/* by Least Squares and Maximum Likelihood      */
/*  Input:                                      */
/*     n the s by p matrix of response counts   */
/*     x the s by r design matrix               */
start catlin(n, x);
   s = nrow(n);                     /* number of populations */
   r = ncol(n);                       /* number of responses */
   q = r-1;                     /* number of function values */
   d = ncol(x);               /* number of design parameters */
   qd = q*d;                   /* total number of parameters */

   /* initial (emprical) probability estimates */
   rown = n[,+];                               /* row totals */
   pr = n/rown;                     /* probability estimates */
   print pr[label="Initial Probability Estimates"];

   /* function of probabilities */
   p = colvec(pr[,1:q]);         /* cut and shaped to vector */
   f = log(p) - log(pr[,r])@repeat(1,q,1);

   /* estimate by the GSK method */
   /* inverse covariance of f    */
   si = (diag(p)-p*p`) # (diag(rown)@repeat(1,q,q));
   z = x@I(q);                     /* expanded design matrix */
   h = z`*si*z;                      /* crossproducts matrix */
   g = z`*si*f;                              /* cross with f */
   beta = solve(h,g);              /* least squares solution */
   stderr = sqrt(vecdiag(inv(h)));        /* standard errors */
   pi = prob(x, beta, q);
   est = beta || stderr;
   pr = shape(pi, 0, q);
   print est[colname={"beta" "stderr"} label="GSK Estimates"], pr;

   /* ML solution   */
   crit = 1;
   do it = 1 to 8 while(crit>.0005);/* iterate until converge*/
      /* block diagonal weighting  */
      si = (diag(pi)-pi*pi`) #( diag(rown)@repeat(1,q,q));
      g = z`*(rown@repeat(1,q,1)#(p-pi));        /* gradient */
      h = z`*si*z;                                /* hessian */
      delta = solve(h,g);  /* correction via Newton's method */
      beta = beta+delta;             /* apply the correction */
      pi = prob(x, beta, q);       /* compute prob estimates */
      crit = max(abs(delta));       /* convergence criterion */
   end;
   stderr = sqrt(vecdiag(inv(h)));        /* standard errors */
   est = beta || stderr;
   pr = shape(pi, 0, q);
   print est[colname={"beta" "stderr"} label="ML Estimates"], pr;
   print it[label="Iterations"]  crit[label="Criterion"];
finish catlin;

/* frequency counts*/
n= { 58 11 05,
     75 19 07,
     49 14 10,
     58 17 08,
     33 18 15,
     45 22 10,
     15 13 15,
     39 22 18,
     04 12 17,
     05 15 08};

/* design matrix */
x= { 1  1  1  0  0  0,
     1 -1  1  0  0  0,
     1  1  0  1  0  0,
     1 -1  0  1  0  0,
     1  1  0  0  1  0,
     1 -1  0  0  1  0,
     1  1  0  0  0  1,
     1 -1  0  0  0  1,
     1  1 -1 -1 -1 -1,
     1 -1 -1 -1 -1 -1};

run catlin(n, x);