Robust Regression and Leverage Points

/****************************************************************/
/*          S A S   S A M P L E   L I B R A R Y                 */
/*                                                              */
/*    NAME: ROBUST1                                             */
/*   TITLE: Robust Regression and Leverage Points               */
/* PRODUCT: IML                                                 */
/*    DATA:                                                     */
/*                                                              */
/* SUPPORT: Rick Wicklin                UPDATE: SEP 2013        */
/*     REF:                                                     */
/*    MISC:                                                     */
/*                                                              */
/****************************************************************/


proc iml;
/* Hertzsprung-Russell Star Data */
/*  ObsNum LogTemp LogIntensity  */
hr = { 1  4.37  5.23,   2  4.56  5.74,   3  4.26  4.93,
       4  4.56  5.74,   5  4.30  5.19,   6  4.46  5.46,
       7  3.84  4.65,   8  4.57  5.27,   9  4.26  5.57,
      10  4.37  5.12,  11  3.49  5.73,  12  4.43  5.45,
      13  4.48  5.42,  14  4.01  4.05,  15  4.29  4.26,
      16  4.42  4.58,  17  4.23  3.94,  18  4.42  4.18,
      19  4.23  4.18,  20  3.49  5.89,  21  4.29  4.38,
      22  4.29  4.22,  23  4.42  4.42,  24  4.49  4.85,
      25  4.38  5.02,  26  4.42  4.66,  27  4.29  4.66,
      28  4.38  4.90,  29  4.22  4.39,  30  3.48  6.05,
      31  4.38  4.42,  32  4.56  5.10,  33  4.45  5.22,
      34  3.49  6.29,  35  4.23  4.34,  36  4.62  5.62,
      37  4.53  5.10,  38  4.45  5.22,  39  4.53  5.18,
      40  4.43  5.57,  41  4.38  4.62,  42  4.45  5.06,
      43  4.50  5.34,  44  4.45  5.34,  45  4.55  5.54,
      46  4.45  4.98,  47  4.42  4.50 } ;

x = hr[,2]; y = hr[,3];

optn = j(9,1,.);
optn[2]= 1;    /* do not print residuals, diagnostics, or history */
optn[3]= 3;    /* compute LS, LMS, and weighted LS regression */

ods select LSEst EstCoeff RLSEstLMS;
call lms(sc, coef, wgt, optn, y, x);
ods select all;

r1 = {"Quantile", "Number of Subsets", "Number of Singular Subsets",
       "Number of Nonzero Weights", "Objective Function",
       "Preliminary Scale Estimate", "Final Scale Estimate",
       "Robust R Squared", "Asymptotic Consistency Factor"};
r2 = { "WLS Scale Estimate", "Weighted Sum of Squares",
       "Weighted R-squared", "F Statistic"};
sc1 = sc[1:9];
sc2 = sc[11:14];
print sc1[r=r1 L="LMS Information and Estimates"],
      sc2[r=r2 L="Weighted Least Squares"];

create HR from HR[c={"ObsNum" "LogTemp" "LogIntensity"}];
append from HR;
close HR;

data HR;
set HR;
if ObsNum in (7, 9, 11, 20, 30, 34) then
   Label = putn(ObsNum, "Best4.");
run;

proc sgplot data=HR;
   scatter x=LogTemp y=LogIntensity / datalabel=Label;
   lineparm x=0 y=6.793 slope=-0.413 / clip curvelabel="LS";
   lineparm x=0 y=-12.628 slope=3.97 / clip curvelabel="LMS";
   *lineparm x=0 y=-13.624 slope=4.23 / clip curvelabel="LTS";
   lineparm x=0 y=-8.5 slope=3.046 / clip curvelabel="WLS";
   xaxis offsetmax=0.1;
   yaxis offsetmax=0.1;
run;

proc iml;
use HR;
read all var _NUM_ into HR;
close HR;

x = hr[,2]; y = hr[,3];

optn = j(9,1,.);
optn[2]= 3;    /* print a maximum amount of information */
optn[3]= 3;    /* compute LS, LTS, and weighted LS regression */

ods select BestHalf EstCoeff;
call lts(sc, coef, wgt, optn, y, x);
ods select all;

data HR;
set HR;
if ObsNum in (2,4,6,10,13,15,17,19,21,22,25,27,28,29,
              33,35,36,38,39,41,42,43,44,45,46) then
   BestSet="Yes";
else BestSet="No ";
label BestSet="Used in Fit";
run;

proc sgplot data=HR;
   scatter x=LogTemp y=LogIntensity / Group=BestSet;
   lineparm x=0 y=-13.624 slope=4.23 / clip curvelabel="LTS";
   xaxis offsetmax=0.1;
   yaxis offsetmax=0.1;
run;