## Ridge Regression

``` /****************************************************************/
/*          S A S   S A M P L E   L I B R A R Y                 */
/*                                                              */
/*    NAME: RIDGE                                               */
/*   TITLE: Ridge Regression                                    */
/* PRODUCT: IML                                                 */
/*  SYSTEM: ALL                                                 */
/*    KEYS: MATRIX PPLOT REGR                                   */
/*   PROCS: IML PLOT                                            */
/*    DATA:                                                     */
/*                                                              */
/* SUPPORT: RHD                         UPDATE:                 */
/*     REF:                                                     */
/*    MISC: CONVERTED FROM MATRIX TO IML USING MATIML           */
/****************************************************************/

* THIS RUN DEMONSTRATES ONE APPROACH TO RIDGE REGRESSION.
THIS RUN MAY EASILY BE EXPANDED TO PLOT THE RIDGE TRACES OF
ALL THE B-VALUES AS WELL AS B'B;

DATA;
START=51735717;
DO N=1 TO 50;
U=RANUNI(START)*5;
X1=U+RANNOR(START)*.5;
X2=U+RANNOR(START)*.5;
X3=U+RANNOR(START)*.5;
X4=U+RANNOR(START)*.5;
Y=1+X1+X2+X3+X4+RANNOR(START);
KEEP X1-X4 Y;
OUTPUT;
END;
RUN;

PROC IML;
RESET AUTONAME ;

START MAIN;
*------------------  RIDGE REGRESSION ------------------------*;
N= J(1);
USE _LAST_ ;
READ ALL INTO XY ;
J= NCOL(XY)-1;
N= NROW(XY);
IJ=1:J;
XY=XY- J(N,1)*( J(1,N)*XY* RECIP(N));
C=XY`*XY;
S= DIAG( RECIP( SQRT( VECDIAG(C))));
R=S*C*S;
PRINT R[L="CORRELATION MATRIX"];
SX= S[IJ,IJ];
SY= RECIP( S[J+1,J+1]);
RX= R[IJ,IJ];
RY= R[IJ,J+1];
SKIP 2;
*--------------  OBTAIN OLS ESTIMATES -------------*;
CALL EIGEN( M, E, RX);
GRX=E* DIAG( RECIP( FUZZ(M)))*E`;
B_OLS=GRX*RY;
SSE=1-RY`*GRX*RY;
MSE=SSE* RECIP(N-J-1);
PRINT B_OLS;
TB_OLS=SX*B_OLS*SY;
PRINT TB_OLS[L="OLS ESTIMATES"];
Q= SSQ(B_OLS)-MSE* TRACE(GRX);
PRINT Q;
IF ( Q<=0) THEN  DO;
PRINT  'Q<=0, K NOT DETERMINED';
STOP;
END;
SKIP 2;
*---- SOLVE FOR K SUCH THAT SSQ(BK)=Q, BY NEWTONS METHOD ----;
K=0.5;
L=E`*RY;
IT=0;
LOOP: KJ=K* J(J,1);
IT=IT+1;
IF ( IT>25) THEN GOTO GOTK;
F= SSQ(L# RECIP(M+KJ))-Q;
IF ( ABS(F)<1E-6) THEN GOTO GOTK;
RMK=(M+KJ)#(M+KJ)#(M+KJ);
DF=2* SUM(L#L# RECIP(RMK));
CF=F* RECIP(DF);
K=K+CF;
GOTO LOOP;

GOTK: BK=E* DIAG( RECIP(M+KJ))*E`*RY;
BKB= SSQ(BK);
PRINT K, BK, BKB, IT;
TBK=SX*BK*SY;
PRINT TBK[L="RIDGE ESTIMATES"];

*--------- PLOT THE RIDGE TRACE ---------*;
RT: OK= SSQ(B_OLS)|| 0;
K=0;

LL: K=K+.1;
KJ=K* J(J,1);
SBK= SSQ(E* DIAG( RECIP(M+KJ))*E`*RY);
OK=OK//(SBK||K);
IF ( K<2) THEN GOTO LL;
OK=OK|| J( NROW(OK),1,Q);
_TMP_ROW = 'ROW1    ' : compress('ROW'+char(nrow(OK)));
CREATE KK ( RENAME=(_TMP_ROW=ROW  )) FROM OK [ROWNAME=_TMP_ROW ];
APPEND FROM OK [ROWNAME=_TMP_ROW];

FINISH MAIN;

RUN MAIN;
QUIT;

PROC SGPLOT data=KK;
scatter y=COL2 x=COL1;
refline COL3 / axis=x;
TITLE 'Ridge Trace';
LABEL COL1='B(K)''B(K)' COL2='K value';
RUN;
title;

```