This example shows a regression module that calculates statistics that are associated with a linear regression.
/* Regression Routine */
/* Given X and Y, this fits Y = X B + E */
/* by least squares. */
proc iml;
start reg;
n = nrow(x); /* number of observations */
k = ncol(x); /* number of variables */
xpx = x`*x; /* crossproducts */
xpy = x`*y;
xpxi = inv(xpx); /* inverse crossproducts */
b = xpxi*xpy; /* parameter estimates */
yhat = x*b; /* predicted values */
resid = y-yhat; /* residuals */
sse = resid`*resid; /* sum of squared errors */
dfe = n-k; /* degrees of freedom error */
mse = sse/dfe; /* mean squared error */
rmse = sqrt(mse); /* root mean squared error */
covb = xpxi#mse; /* covariance of estimates */
stdb = sqrt(vecdiag(covb)); /* standard errors */
t = b/stdb; /* ttest for estimates=0 */
probt = 1-probf(t#t,1,dfe); /* significance probability */
print name b stdb t probt;
s = diag(1/stdb);
corrb = s*covb*s; /* correlation of estimates */
print ,"Covariance of Estimates", covb[r=name c=name] ,
"Correlation of Estimates",corrb[r=name c=name] ;
if nrow(tval)=0 then return; /* is a t value specified? */
projx = x*xpxi*x`; /* hat matrix */
vresid = (i(n)-projx)*mse; /* covariance of residuals */
vpred = projx#mse; /* covariance of predicted values */
h = vecdiag(projx); /* hat leverage values */
lowerm = yhat-tval#sqrt(h*mse); /* low. conf lim for mean */
upperm = yhat+tval#sqrt(h*mse); /* upper lim. for mean */
lower = yhat-tval#sqrt(h*mse+mse); /* lower lim. for indiv*/
upper = yhat+tval#sqrt(h*mse+mse);/* upper lim. for indiv */
print ,,"Predicted Values, Residuals, and Limits" ,,
y yhat resid h lowerm upperm lower upper;
finish reg;
/* Routine to test a linear combination of the estimates */
/* given L, this routine tests hypothesis that LB = 0. */
start test;
dfn=nrow(L);
Lb=L*b;
vLb=L*xpxi*L`;
q=Lb`*inv(vLb)*Lb /dfn;
f=q/mse;
prob=1-probf(f,dfn,dfe);
print ,f dfn dfe prob;
finish test;
/* Run it on population of U.S. for decades beginning 1790 */
x= { 1 1 1,
1 2 4,
1 3 9,
1 4 16,
1 5 25,
1 6 36,
1 7 49,
1 8 64 };
y= {3.929,5.308,7.239,9.638,12.866,17.069,23.191,31.443};
name={"Intercept", "Decade", "Decade**2" };
tval=2.57; /* for 5 df at 0.025 level to get 95% conf. int. */
reset fw=7;
run reg;
do;
print ,"TEST Coef for Linear";
L={0 1 0 };
run test;
print ,"TEST Coef for Linear,Quad";
L={0 1 0,0 0 1};
run test;
print ,"TEST Linear+Quad = 0";
L={0 1 1 };
run test;
end;