Example 9.3 Regression

This example shows a regression module that calculates statistics that are associated with a linear regression.

   /*         Regression Routine               */
   /* Given X and Y, this fits Y = X B + E    */
   /* by least squares.                        */
proc iml;
start reg;
   n = nrow(x);                    /* number of observations */
   k = ncol(x);                       /* number of variables */
   xpx = x`*x;                              /* crossproducts */
   xpy = x`*y;    
   xpxi = inv(xpx);                 /* inverse crossproducts */
   b = xpxi*xpy;                      /* parameter estimates */
   yhat = x*b;                           /* predicted values */
   resid = y-yhat;                              /* residuals */
   sse = resid`*resid;              /* sum of squared errors */
   dfe = n-k;                    /* degrees of freedom error */
   mse = sse/dfe;                      /* mean squared error */
   rmse = sqrt(mse);              /* root mean squared error */
   covb = xpxi#mse;               /* covariance of estimates */
   stdb = sqrt(vecdiag(covb));            /* standard errors */
   t = b/stdb;                      /* ttest for estimates=0 */
   probt = 1-probf(t#t,1,dfe);   /* significance probability */
   print name b stdb t probt;  
   s = diag(1/stdb);           
   corrb = s*covb*s;             /* correlation of estimates */
   print ,"Covariance of Estimates", covb[r=name c=name] ,
          "Correlation of Estimates",corrb[r=name c=name] ;

   if nrow(tval)=0 then return;   /* is a t value specified? */
   projx = x*xpxi*x`;                          /* hat matrix */
   vresid = (i(n)-projx)*mse;     /* covariance of residuals */
   vpred = projx#mse;     /* covariance of predicted values  */
   h = vecdiag(projx);                /* hat leverage values */
   lowerm = yhat-tval#sqrt(h*mse); /* low. conf lim for mean */
   upperm = yhat+tval#sqrt(h*mse);    /* upper lim. for mean */
   lower = yhat-tval#sqrt(h*mse+mse); /* lower lim. for indiv*/
   upper = yhat+tval#sqrt(h*mse+mse);/* upper lim. for indiv */
   print ,,"Predicted Values, Residuals, and Limits" ,,
   y yhat resid h lowerm upperm lower upper;
finish reg;

   /* Routine to test a linear combination of the estimates  */
   /* given L, this routine tests hypothesis that LB = 0.    */

start test;
   dfn=nrow(L);
   Lb=L*b;
   vLb=L*xpxi*L`;
   q=Lb`*inv(vLb)*Lb /dfn;
   f=q/mse;
   prob=1-probf(f,dfn,dfe);
   print ,f dfn dfe prob;
finish test;

  /* Run it on population of U.S. for decades beginning 1790 */

x= { 1 1 1,
     1 2 4,
     1 3 9,
     1 4 16,
     1 5 25,
     1 6 36,
     1 7 49,
     1 8 64 };

y= {3.929,5.308,7.239,9.638,12.866,17.069,23.191,31.443};
name={"Intercept", "Decade", "Decade**2" };
tval=2.57;  /* for 5 df at 0.025 level to get 95% conf. int. */
reset fw=7;
run reg;
do;
   print ,"TEST Coef for Linear";
   L={0 1 0 };
   run test;
   print ,"TEST Coef for Linear,Quad";
   L={0 1 0,0 0 1};
   run test;
   print ,"TEST Linear+Quad = 0";
   L={0 1 1 };
   run test;
end;

The results are shown in Output 9.3.1.

Output 9.3.1: Regression Results

name b stdb t probt
Intercept 5.06934 0.96559 5.24997 0.00333
Decade -1.1099 0.4923 -2.2546 0.07385
Decade**2 0.53964 0.0534 10.106 0.00016

Covariance of Estimates

covb
  Intercept Decade Decade**2
Intercept 0.93237 -0.4362 0.04277
Decade -0.4362 0.24236 -0.0257
Decade**2 0.04277 -0.0257 0.00285

Correlation of Estimates

corrb
  Intercept Decade Decade**2
Intercept 1 -0.9177 0.8295
Decade -0.9177 1 -0.9762
Decade**2 0.8295 -0.9762 1

Predicted Values, Residuals, and Limits

y yhat resid h lowerm upperm lower upper
3.929 4.49904 -0.57 0.70833 3.00202 5.99606 2.17419 6.82389
5.308 5.00802 0.29998 0.27976 4.06721 5.94883 2.99581 7.02023
7.239 6.59627 0.64273 0.23214 5.73926 7.45328 4.62185 8.57069
9.638 9.26379 0.37421 0.27976 8.32298 10.2046 7.25158 11.276
12.866 13.0106 -0.1446 0.27976 12.0698 13.9514 10.9984 15.0228
17.069 17.8367 -0.7677 0.23214 16.9797 18.6937 15.8622 19.8111
23.191 23.742 -0.551 0.27976 22.8012 24.6828 21.7298 25.7542
31.443 30.7266 0.71638 0.70833 29.2296 32.2236 28.4018 33.0515

TEST Coef for Linear

f dfn dfe prob
5.08317 1 5 0.07385

TEST Coef for Linear,Quad

f dfn dfe prob
666.511 2 5 8.54E-7

TEST Linear+Quad = 0

f dfn dfe prob
1.67746 1 5 0.25184