Ridge Regression
/****************************************************************/
/* S A S S A M P L E L I B R A R Y */
/* */
/* NAME: RIDGE */
/* TITLE: Ridge Regression */
/* PRODUCT: IML */
/* SYSTEM: ALL */
/* KEYS: MATRIX PPLOT REGR */
/* PROCS: IML PLOT */
/* DATA: */
/* */
/* SUPPORT: RHD UPDATE: */
/* REF: */
/* MISC: CONVERTED FROM MATRIX TO IML USING MATIML */
/****************************************************************/
* THIS RUN DEMONSTRATES ONE APPROACH TO RIDGE REGRESSION.
THIS RUN MAY EASILY BE EXPANDED TO PLOT THE RIDGE TRACES OF
ALL THE B-VALUES AS WELL AS B'B;
DATA;
START=51735717;
DO N=1 TO 50;
U=RANUNI(START)*5;
X1=U+RANNOR(START)*.5;
X2=U+RANNOR(START)*.5;
X3=U+RANNOR(START)*.5;
X4=U+RANNOR(START)*.5;
Y=1+X1+X2+X3+X4+RANNOR(START);
KEEP X1-X4 Y;
OUTPUT;
END;
RUN;
PROC IML;
RESET AUTONAME ;
START MAIN;
*------------------ RIDGE REGRESSION ------------------------*;
N= J(1);
USE _LAST_ ;
READ ALL INTO XY ;
J= NCOL(XY)-1;
N= NROW(XY);
IJ=1:J;
XY=XY- J(N,1)*( J(1,N)*XY* RECIP(N));
C=XY`*XY;
S= DIAG( RECIP( SQRT( VECDIAG(C))));
R=S*C*S;
PRINT R[L="CORRELATION MATRIX"];
SX= S[IJ,IJ];
SY= RECIP( S[J+1,J+1]);
RX= R[IJ,IJ];
RY= R[IJ,J+1];
SKIP 2;
*-------------- OBTAIN OLS ESTIMATES -------------*;
CALL EIGEN( M, E, RX);
GRX=E* DIAG( RECIP( FUZZ(M)))*E`;
B_OLS=GRX*RY;
SSE=1-RY`*GRX*RY;
MSE=SSE* RECIP(N-J-1);
PRINT B_OLS;
TB_OLS=SX*B_OLS*SY;
PRINT TB_OLS[L="OLS ESTIMATES"];
Q= SSQ(B_OLS)-MSE* TRACE(GRX);
PRINT Q;
IF ( Q<=0) THEN DO;
PRINT 'Q<=0, K NOT DETERMINED';
STOP;
END;
SKIP 2;
*---- SOLVE FOR K SUCH THAT SSQ(BK)=Q, BY NEWTONS METHOD ----;
K=0.5;
L=E`*RY;
IT=0;
LOOP: KJ=K* J(J,1);
IT=IT+1;
IF ( IT>25) THEN GOTO GOTK;
F= SSQ(L# RECIP(M+KJ))-Q;
IF ( ABS(F)<1E-6) THEN GOTO GOTK;
RMK=(M+KJ)#(M+KJ)#(M+KJ);
DF=2* SUM(L#L# RECIP(RMK));
CF=F* RECIP(DF);
K=K+CF;
GOTO LOOP;
GOTK: BK=E* DIAG( RECIP(M+KJ))*E`*RY;
BKB= SSQ(BK);
PRINT K, BK, BKB, IT;
TBK=SX*BK*SY;
PRINT TBK[L="RIDGE ESTIMATES"];
*--------- PLOT THE RIDGE TRACE ---------*;
RT: OK= SSQ(B_OLS)|| 0;
K=0;
LL: K=K+.1;
KJ=K* J(J,1);
SBK= SSQ(E* DIAG( RECIP(M+KJ))*E`*RY);
OK=OK//(SBK||K);
IF ( K<2) THEN GOTO LL;
OK=OK|| J( NROW(OK),1,Q);
_TMP_ROW = 'ROW1 ' : compress('ROW'+char(nrow(OK)));
CREATE KK ( RENAME=(_TMP_ROW=ROW )) FROM OK [ROWNAME=_TMP_ROW ];
APPEND FROM OK [ROWNAME=_TMP_ROW];
FINISH MAIN;
RUN MAIN;
QUIT;
PROC SGPLOT data=KK;
scatter y=COL2 x=COL1;
refline COL3 / axis=x;
TITLE 'Ridge Trace';
LABEL COL1='B(K)''B(K)' COL2='K value';
RUN;
title;