Resources

Logistic and Probit Regression for a Binary Response

/****************************************************************/
/*          S A S   S A M P L E   L I B R A R Y                 */
/*                                                              */
/*    NAME: LOGIT                                               */
/*   TITLE: Logistic and Probit Regression for a Binary Response*/
/* PRODUCT: IML                                                 */
/*  SYSTEM: ALL                                                 */
/*    KEYS: MATRIX  REGR    SUGI6                               */
/*   PROCS: IML                                                 */
/*    DATA:                                                     */
/*                                                              */
/* SUPPORT: RHD                         UPDATE: SEP2013         */
/*     REF:                                                     */
/*    MISC:                                                     */
/*                                                              */
/****************************************************************/


proc iml ;
/* compute density function (PDF) */
start Density(model, z);
   if upcase(model)='LOGIT'  then
      return( pdf("Logistic", z) );
   else /* "PROBIT" */
      return( pdf("Normal", z) );
finish;

/* compute cumulative distribution function (CDF) */
start Distrib(model, z);
   if upcase(model)='LOGIT'  then
      return( cdf("Logistic", z) );
   else /* "PROBIT" */
      return( cdf("Normal", z) );
finish;

/* routine for estimating binary response models             */
/* model is "logit" or "probit"                              */
/* varNames has the names of the regressor variables         */
start BinEst(nEvents, nTrials, data, model, varNames);
   /* set up design matrix */
   n = nrow(data);
   x = j(n,1,1) || data;                       /* add intercept */
   x = x // x;                                    /* regressors */
   y = j(n,1,1) // j(n,1,0);      /* binary response: 1s and 0s */
   wgt = nEvents // (nTrials-nEvents);         /* count weights */

   parms = "Intercept" || rowvec(varNames);
   k = ncol(x);
   b = j(k,1,0);                             /* starting values */
   oldb = b+1;
   results = j(20, 2+k, .);          /* store iteration history */
   do iter=1 to nrow(results) while(max(abs(b-oldb))>1e-8);
      oldb = b;
      z = x*b;
      p = Distrib(model, z);
      loglik = sum( wgt#((y=1)#log(p) + (y=0)#log(1-p)) );
      results[iter, ] = iter || loglik || b`;
      w = wgt / (p#(1-p));
      f = Density(model, z);
      xx = f#x;
      xpxi = inv(xx`*(w#xx));
      b = b + xpxi*(xx`*(w#(y-p)));
   end;
   idx = loc(results^=.);    /* trim results if few iterations */
   results = shape(results[idx],0,2+ncol(parms));
   colnames = {"Iter" "LogLik"} || parms;
   lbl = "Iteration History: " + model + " Model";
   print results[colname=colnames label=lbl];

   p0 = sum((y=1)#wgt) / sum(wgt);           /* average response */
   loglik0 = sum( wgt#((y=1)#log(p0) + (y=0)#log(1-p0)) );
   chisq = 2#(loglik-loglik0);
   df = k-1;
   prob = 1 - cdf("ChiSq", chisq, df);
   stats = chisq || df || prob;
   print stats[colname={'ChiSq' 'DF' 'Prob'}
               label='Likelihood Ratio, Intercept-only Model'];

   stderr = sqrt(vecdiag(xpxi));
   tRatio = b/stderr;
   print (parms`)[label="parms"] b stderr tRatio;
finish;

data={ 7 1.0 0 10, 14 1.0 0 31, 27 1.0 1 56, 51 1.0 3 13,
       7 1.7 0 17, 14 1.7 0 43, 27 1.7 4 44, 51 1.7 0  1,
       7 2.2 0  7, 14 2.2 2 33, 27 2.2 0 21, 51 2.2 0  1,
       7 2.8 0 12, 14 2.8 0 31, 27 2.8 1 22,
       7 4.0 0  9, 14 4.0 0 19, 27 4.0 1 16, 51 4.0 0  1};
x = data[, 1:2];
parms = {"Heat" "Soak"};
nReady = data[, 3];
nTotal = data[, 4];

run BinEst(nReady, nTotal, x, "Logit", parms);     /* run logit model  */

run BinEst(nReady, nTotal, x, "Probit", parms);    /* run probit model */