Robust Regression and Leverage Points
/****************************************************************/
/* S A S S A M P L E L I B R A R Y */
/* */
/* NAME: ROBUST1 */
/* TITLE: Robust Regression and Leverage Points */
/* PRODUCT: IML */
/* DATA: */
/* */
/* SUPPORT: Rick Wicklin UPDATE: SEP 2013 */
/* REF: */
/* MISC: */
/* */
/****************************************************************/
proc iml;
/* Hertzsprung-Russell Star Data */
/* ObsNum LogTemp LogIntensity */
hr = { 1 4.37 5.23, 2 4.56 5.74, 3 4.26 4.93,
4 4.56 5.74, 5 4.30 5.19, 6 4.46 5.46,
7 3.84 4.65, 8 4.57 5.27, 9 4.26 5.57,
10 4.37 5.12, 11 3.49 5.73, 12 4.43 5.45,
13 4.48 5.42, 14 4.01 4.05, 15 4.29 4.26,
16 4.42 4.58, 17 4.23 3.94, 18 4.42 4.18,
19 4.23 4.18, 20 3.49 5.89, 21 4.29 4.38,
22 4.29 4.22, 23 4.42 4.42, 24 4.49 4.85,
25 4.38 5.02, 26 4.42 4.66, 27 4.29 4.66,
28 4.38 4.90, 29 4.22 4.39, 30 3.48 6.05,
31 4.38 4.42, 32 4.56 5.10, 33 4.45 5.22,
34 3.49 6.29, 35 4.23 4.34, 36 4.62 5.62,
37 4.53 5.10, 38 4.45 5.22, 39 4.53 5.18,
40 4.43 5.57, 41 4.38 4.62, 42 4.45 5.06,
43 4.50 5.34, 44 4.45 5.34, 45 4.55 5.54,
46 4.45 4.98, 47 4.42 4.50 } ;
x = hr[,2]; y = hr[,3];
optn = j(9,1,.);
optn[2]= 1; /* do not print residuals, diagnostics, or history */
optn[3]= 3; /* compute LS, LMS, and weighted LS regression */
ods select LSEst EstCoeff RLSEstLMS;
call lms(sc, coef, wgt, optn, y, x);
ods select all;
r1 = {"Quantile", "Number of Subsets", "Number of Singular Subsets",
"Number of Nonzero Weights", "Objective Function",
"Preliminary Scale Estimate", "Final Scale Estimate",
"Robust R Squared", "Asymptotic Consistency Factor"};
r2 = { "WLS Scale Estimate", "Weighted Sum of Squares",
"Weighted R-squared", "F Statistic"};
sc1 = sc[1:9];
sc2 = sc[11:14];
print sc1[r=r1 L="LMS Information and Estimates"],
sc2[r=r2 L="Weighted Least Squares"];
create HR from HR[c={"ObsNum" "LogTemp" "LogIntensity"}];
append from HR;
close HR;
data HR;
set HR;
if ObsNum in (7, 9, 11, 20, 30, 34) then
Label = putn(ObsNum, "Best4.");
run;
proc sgplot data=HR;
scatter x=LogTemp y=LogIntensity / datalabel=Label;
lineparm x=0 y=6.793 slope=-0.413 / clip curvelabel="LS";
lineparm x=0 y=-12.628 slope=3.97 / clip curvelabel="LMS";
*lineparm x=0 y=-13.624 slope=4.23 / clip curvelabel="LTS";
lineparm x=0 y=-8.5 slope=3.046 / clip curvelabel="WLS";
xaxis offsetmax=0.1;
yaxis offsetmax=0.1;
run;
proc iml;
use HR;
read all var _NUM_ into HR;
close HR;
x = hr[,2]; y = hr[,3];
optn = j(9,1,.);
optn[2]= 3; /* print a maximum amount of information */
optn[3]= 3; /* compute LS, LTS, and weighted LS regression */
ods select BestHalf EstCoeff;
call lts(sc, coef, wgt, optn, y, x);
ods select all;
data HR;
set HR;
if ObsNum in (2,4,6,10,13,15,17,19,21,22,25,27,28,29,
33,35,36,38,39,41,42,43,44,45,46) then
BestSet="Yes";
else BestSet="No ";
label BestSet="Used in Fit";
run;
proc sgplot data=HR;
scatter x=LogTemp y=LogIntensity / Group=BestSet;
lineparm x=0 y=-13.624 slope=4.23 / clip curvelabel="LTS";
xaxis offsetmax=0.1;
yaxis offsetmax=0.1;
run;