Logistic and Probit Regression for a Binary Response
/****************************************************************/
/* S A S S A M P L E L I B R A R Y */
/* */
/* NAME: LOGIT */
/* TITLE: Logistic and Probit Regression for a Binary Response*/
/* PRODUCT: IML */
/* SYSTEM: ALL */
/* KEYS: MATRIX REGR SUGI6 */
/* PROCS: IML */
/* DATA: */
/* */
/* SUPPORT: RHD UPDATE: SEP2013 */
/* REF: */
/* MISC: */
/* */
/****************************************************************/
proc iml ;
/* compute density function (PDF) */
start Density(model, z);
if upcase(model)='LOGIT' then
return( pdf("Logistic", z) );
else /* "PROBIT" */
return( pdf("Normal", z) );
finish;
/* compute cumulative distribution function (CDF) */
start Distrib(model, z);
if upcase(model)='LOGIT' then
return( cdf("Logistic", z) );
else /* "PROBIT" */
return( cdf("Normal", z) );
finish;
/* routine for estimating binary response models */
/* model is "logit" or "probit" */
/* varNames has the names of the regressor variables */
start BinEst(nEvents, nTrials, data, model, varNames);
/* set up design matrix */
n = nrow(data);
x = j(n,1,1) || data; /* add intercept */
x = x // x; /* regressors */
y = j(n,1,1) // j(n,1,0); /* binary response: 1s and 0s */
wgt = nEvents // (nTrials-nEvents); /* count weights */
parms = "Intercept" || rowvec(varNames);
k = ncol(x);
b = j(k,1,0); /* starting values */
oldb = b+1;
results = j(20, 2+k, .); /* store iteration history */
do iter=1 to nrow(results) while(max(abs(b-oldb))>1e-8);
oldb = b;
z = x*b;
p = Distrib(model, z);
loglik = sum( wgt#((y=1)#log(p) + (y=0)#log(1-p)) );
results[iter, ] = iter || loglik || b`;
w = wgt / (p#(1-p));
f = Density(model, z);
xx = f#x;
xpxi = inv(xx`*(w#xx));
b = b + xpxi*(xx`*(w#(y-p)));
end;
idx = loc(results^=.); /* trim results if few iterations */
results = shape(results[idx],0,2+ncol(parms));
colnames = {"Iter" "LogLik"} || parms;
lbl = "Iteration History: " + model + " Model";
print results[colname=colnames label=lbl];
p0 = sum((y=1)#wgt) / sum(wgt); /* average response */
loglik0 = sum( wgt#((y=1)#log(p0) + (y=0)#log(1-p0)) );
chisq = 2#(loglik-loglik0);
df = k-1;
prob = 1 - cdf("ChiSq", chisq, df);
stats = chisq || df || prob;
print stats[colname={'ChiSq' 'DF' 'Prob'}
label='Likelihood Ratio, Intercept-only Model'];
stderr = sqrt(vecdiag(xpxi));
tRatio = b/stderr;
print (parms`)[label="parms"] b stderr tRatio;
finish;
data={ 7 1.0 0 10, 14 1.0 0 31, 27 1.0 1 56, 51 1.0 3 13,
7 1.7 0 17, 14 1.7 0 43, 27 1.7 4 44, 51 1.7 0 1,
7 2.2 0 7, 14 2.2 2 33, 27 2.2 0 21, 51 2.2 0 1,
7 2.8 0 12, 14 2.8 0 31, 27 2.8 1 22,
7 4.0 0 9, 14 4.0 0 19, 27 4.0 1 16, 51 4.0 0 1};
x = data[, 1:2];
parms = {"Heat" "Soak"};
nReady = data[, 3];
nTotal = data[, 4];
run BinEst(nReady, nTotal, x, "Logit", parms); /* run logit model */
run BinEst(nReady, nTotal, x, "Probit", parms); /* run probit model */