## Modules for Plotting Robust Regression Results

```/****************************************************************/
/*          S A S   S A M P L E   L I B R A R Y                 */
/*                                                              */
/*    NAME: ROBUSTMC                                            */
/*   TITLE: Modules for Plotting Robust Regression Results      */
/* PRODUCT: IML                                                 */
/*  SYSTEM: ALL                                                 */
/*                                                              */
/* SUPPORT: Rick Wicklin                UPDATE: Jun 2016        */
/*     REF:                                                     */
/*    MISC:                                                     */
/* Modules: MVESCATTER - PLOTTING MVE RESULTS                   */
/*          MCDSCATTER - PLOTTING MCD RESULTS                   */
/*          RDPLOT - PLOTTING LMS AND LMT RESULTS               */
/*          CalcEllipsesFromCov - supports RESET NOEIGEN93      */
/****************************************************************/

proc iml;

/*------------------------------------------------------------*/
start CalcEllipsesFromCov(ellipse, mAxes, ConfLevel, numObs,
mCenter, mCov, options );
/* This module computes confidence ellipses for mean or prediction
from a 2x2 covariance matrix.
Output matrices:
ellipse    a numEllipsePts x (2*ncol(ConfLevel)) matrix that
contains the ellipses in parametric form.  There
are numEllipsePts in the parameterization.
Each pair of columns ellipse[,2*i-1:2*i] gives the
X and Y coordinates of the ellipse for confidence
level ConfLevel[i].
mAxes      a 4x(2*ncol(ConfLevel)) matrix containing the major
and minor axes of the ellipses.  Each pair of columns
m = mAxes[,2*i-1:2*i] describes the axes for that
ellipse. The major axis goes from m[1,] to m[2,] and
the minor axis goes from m[3,] to m[4,].

Input parameters:
ConfLevel  a 1xn vector of (1-alpha) confidence levels that will
determine the ellipses. Each entry is
0 < ConfLevel[i] < 1.  For example, 0.95 produces the
95% confidence ellipse for alpha=0.05.
numObs     The number of nonmissing observations in the data
underlying the covariance matrix.  Require numObs>2.
mCenter    a 1x2 vector containing an estimate of the (X,Y)
center of the data. This can be the mean of X and Y,
or any robust estimate.
mCov       a 2x2 symmetric covariance matrix. This can be the
standard covariance matrix or a robust estimate of
the covariance.
options    options[1]=1  ==> compute prediction ellipse
otherwise     ==> compute confidence ellipse for mean
*/

numEllipsePts = 100;

/* Eigenvectors stored in columns. Symmetry==>pos real eigenvalues */
call eigen(eval,evec,mCov);

/* Standardize eigenvectors: put e1 in half space y>0 */
if evec[2,1] < 0 then
evec[,1] = -evec[,1];
/* Put e2 so that it is in clockwise direction from e1 */
if det(evec) < 0 then
evec[,2] = -evec[,2];

/* handle numeric zero */
if eval[1] <= 0 then da = 0.;
else da = sqrt(eval[1]);
if eval[2] <= 0 then db = 0.;
else db = sqrt(eval[2]);

/* Get orthogonal rotation matrix.
Since length of eigenvectors is 1:
evec[,1] = <cos(alpha), sin(alpha)> where alpha is direction angle
rot = [cos(alpha)   -sin(alpha)]
[sin(alpha)    cos(alpha)]
Then rot*{x,y} is rotation of {x,y} by alpha.  If {x y} is row vector,
use {x y}*rot`
Because evec is in standard position, rot = evec */
rot = evec;

RX = da * T(evec[,1]); /* row vector: lambda1*evec[,1] */
RY = db * T(evec[,2]); /* row vector: lambda2*evec[,2] */
axes = j(4,2);
axes[1,] = -RX; /* negative semi-major axis */
axes[2,] =  RX; /* positive semi-major axis */
axes[3,] = -RY; /* negative semi-minor axis */
axes[4,] =  RY; /* positive semi-minor axis */

/* see documentation for CORR */
c = 2*(numObs-1) / (numObs*(numObs-2)); /* mean */
if options=1 then
c = c * (numObs+1);                  /* predictions */
/* note ConfLevel = 1-alpha */
F = sqrt( c * quantile( "F", ConfLevel, 2, numObs-2 ) );

/* Points of ellipsoid */
twoPi = 2*constant("pi");
phi = twoPi * T(0:numEllipsePts-1) / (numEllipsePts-1);
x = cos(phi);
y = sin(phi);

p = rowvec(ConfLevel);
ellipse = j(numEllipsePts, 2*ncol(p));
mAxes = j(4, 2*ncol(p));
ellipseCenter = j(numEllipsePts,1,1) * mCenter;
axesCenter = j(4,1,1) * mCenter;
do i = 1 to ncol(p);
ellipse[,(2*i-1):2*i] = F[i] # (da*x || db*y) * rot` + ellipseCenter;
mAxes[,(2*i-1):2*i] = F[i] # axes + axesCenter;
end;
finish;

/*------------------------------------------------------------*/

start MVEScatter(a, optn, prob=0.9, vnam=,
titl="Estimates of Location and Scale");
if IsSkipped(vnam) then
varnames = "x1":("x"+strip(char(ncol(a))));
else varnames = vnam;
call RobustScatter("MVE", a, optn, prob, varnames, titl);
finish;

/*------------------------------------------------------------*/

start MCDScatter(a, optn, prob=0.9, vnam=,
titl="Estimates of Location and Scale");
if IsSkipped(vnam) then
varnames = "x1":("x"+strip(char(ncol(a))));
else varnames = vnam;
call RobustScatter("MCD", a, optn, prob, varnames, titl);
finish;

/*------------------------------------------------------------*/

start RobustScatter(type, a, optn, prob=0.9, varnames,
titl="Estimates of Location and Scale");
p = ncol(a);
numObs = nrow(a);
muc = mean(a); /* classical estimates */
covc= cov(a);

if upcase(type)="MVE" then
CALL MVE(sc,xm,dist,optn,a);
else
CALL MCD(sc,xm,dist,optn,a);
mur = xm[1,]; /* robust estimates */
covr = xm[3:2+p,];
wgts = dist[3,];

do j = 2 to p;
do i = 1 to j-1;
x=a[,i]; y=a[,j];
mu2 = mur[i] || mur[j];
cov2 = ( covr[i,i] || covr[i,j] ) //
( covr[j,i] || covr[j,j] );
run CalcEllipsesFromCov(ellipse, mAxes, prob,
numObs, mu2, cov2, 1);
ex = ellipse[,1];      ey = ellipse[,2];
create _dta var {"x" "y" "wgts" "ex" "ey"};
append;
close _dta;
labx = varnames[i]; laby = varnames[j];
submit labx laby titl;
data _dta;
set _dta;
label x="&labx" y="&laby";
if wgts=0 then do;
Outlier="Yes";  ID=_N_;
end;
else do;
Outlier="No";  ID=.;
end;
run;

title "&titl";
proc sgplot data=_dta;
styleattrs datacolors=(Blue Red)
datasymbols=(CircleFilled X);
scatter x=x y=y / markerattrs=(size=10)
datalabel=ID
group=Outlier nomissinggroup name="scat";
ellipse x=x y=y / alpha=0.1 lineattrs=(pattern=dash)
name="classic"
legendlabel="Classical 90% Prediction";
series x=ex y=ey /name="robust"
legendlabel="Robust 90% Prediction";
keylegend "scat" /location=inside position=bottomright
across=1 title="Outlier";
keylegend "classic" "robust";
run;
title;
endsubmit;
end;
end;
call delete(_dta);
finish RobustScatter;

/*------------------------------------------------------------*/

start RDPlot(method,LMSOpt,MCDOpt,y,x);
call mcd(sc,xmve,dist,MCDOpt,x);
if upcase(method)="LMS" then
call lms(sc, coef, wgt, LMSOpt, y, x);
else
call lts(sc, coef, wgt, LMSOpt, y, x);

RobResid = wgt[2,]; /* robust residuals */
MD = dist[1,];      /* Mahalanobis distance (MD) */
RD = dist[2,];      /* Robust distance (RD) */

Outlier = (abs(RobResid)>2.5);
cutoff = sqrt(quantile("chisq",0.975, ncol(x)));
Leverage = ^dist[3,]; /* indicator variable for leverage pts */
/* classify each point:
0 = regular point
1 = outlier
2 = leverage
3 = outlier & leverage */
Both = Outlier # Leverage;
Type = choose(Leverage, 2, Outlier);
Type = choose(Both, 3, Type);
create _rdplot0 var {"RobResid" MD RD "Type"};
append;
close _rdplot0;

submit cutoff;
proc format;
value TypeFmt 0 = "Regular"
1 = "Outlier"
2 = "Leverage"
3 = "Outlier and Leverage";
run;

data _Attrs;
length Value \$20 MarkerColor \$20;
ID = "RDAttr";
Value= putn(0,"TypeFmt."); MarkerColor = "Black       "; output;
Value= putn(1,"TypeFmt."); MarkerColor = "DarkRed     "; output;
Value= putn(2,"TypeFmt."); MarkerColor = "DarkBlue    "; output;
Value= putn(3,"TypeFmt."); MarkerColor = "DarkGreen   "; output;
run;

data _rdplot;
label RobResid="Standardized Robust Residual"
MD="Mahalanobis Distance"
RD="Robust Distance";
format Type TypeFmt.;
set _rdplot0;
if Type>0 then ID=_N_;
run;

title "Outlier and Leverage Diagnostics";
proc sgplot data=_rdplot DATTRMAP=_Attrs;
scatter x=RD y=RobResid / datalabel=ID
markerattrs=(size=10 symbol=CircleFilled)
transparency=0.3 group=Type
attrid=RDAttr GROUPORDER=ascending;
refline &cutoff / axis=x;
refline -2.5 2.5 / axis=y;
run;
title;
endsubmit;

call delete({_Attrs _rdplot0});
finish RDPlot;

/*------------------------------------------------------------*/

store module=_all_;
quit;

```