Resources

Ridge Regression

 /****************************************************************/
 /*          S A S   S A M P L E   L I B R A R Y                 */
 /*                                                              */
 /*    NAME: RIDGE                                               */
 /*   TITLE: Ridge Regression                                    */
 /* PRODUCT: IML                                                 */
 /*  SYSTEM: ALL                                                 */
 /*    KEYS: MATRIX PPLOT REGR                                   */
 /*   PROCS: IML PLOT                                            */
 /*    DATA:                                                     */
 /*                                                              */
 /* SUPPORT: RHD                         UPDATE:                 */
 /*     REF:                                                     */
 /*    MISC: CONVERTED FROM MATRIX TO IML USING MATIML           */
 /****************************************************************/

* THIS RUN DEMONSTRATES ONE APPROACH TO RIDGE REGRESSION.
  THIS RUN MAY EASILY BE EXPANDED TO PLOT THE RIDGE TRACES OF
  ALL THE B-VALUES AS WELL AS B'B;

DATA;
 START=51735717;
 DO N=1 TO 50;
    U=RANUNI(START)*5;
    X1=U+RANNOR(START)*.5;
    X2=U+RANNOR(START)*.5;
    X3=U+RANNOR(START)*.5;
    X4=U+RANNOR(START)*.5;
    Y=1+X1+X2+X3+X4+RANNOR(START);
    KEEP X1-X4 Y;
    OUTPUT;
 END;
RUN;

PROC IML;
RESET AUTONAME ;

START MAIN;
 *------------------  RIDGE REGRESSION ------------------------*;
  N= J(1);
  USE _LAST_ ;
  READ ALL INTO XY ;
  J= NCOL(XY)-1;
  N= NROW(XY);
  IJ=1:J;
  XY=XY- J(N,1)*( J(1,N)*XY* RECIP(N));
  C=XY`*XY;
  S= DIAG( RECIP( SQRT( VECDIAG(C))));
  R=S*C*S;
  PRINT R[L="CORRELATION MATRIX"];
  SX= S[IJ,IJ];
  SY= RECIP( S[J+1,J+1]);
  RX= R[IJ,IJ];
  RY= R[IJ,J+1];
  SKIP 2;
  *--------------  OBTAIN OLS ESTIMATES -------------*;
  CALL EIGEN( M, E, RX);
  GRX=E* DIAG( RECIP( FUZZ(M)))*E`;
  B_OLS=GRX*RY;
  SSE=1-RY`*GRX*RY;
  MSE=SSE* RECIP(N-J-1);
  PRINT B_OLS;
  TB_OLS=SX*B_OLS*SY;
  PRINT TB_OLS[L="OLS ESTIMATES"];
  Q= SSQ(B_OLS)-MSE* TRACE(GRX);
  PRINT Q;
  IF ( Q<=0) THEN  DO;
     PRINT  'Q<=0, K NOT DETERMINED';
     STOP;
     END;
  SKIP 2;
  *---- SOLVE FOR K SUCH THAT SSQ(BK)=Q, BY NEWTONS METHOD ----;
  K=0.5;
  L=E`*RY;
  IT=0;
LOOP: KJ=K* J(J,1);
  IT=IT+1;
  IF ( IT>25) THEN GOTO GOTK;
  F= SSQ(L# RECIP(M+KJ))-Q;
  IF ( ABS(F)<1E-6) THEN GOTO GOTK;
  RMK=(M+KJ)#(M+KJ)#(M+KJ);
  DF=2* SUM(L#L# RECIP(RMK));
  CF=F* RECIP(DF);
  K=K+CF;
  GOTO LOOP;

GOTK: BK=E* DIAG( RECIP(M+KJ))*E`*RY;
  BKB= SSQ(BK);
  PRINT K, BK, BKB, IT;
  TBK=SX*BK*SY;
  PRINT TBK[L="RIDGE ESTIMATES"];

  *--------- PLOT THE RIDGE TRACE ---------*;
RT: OK= SSQ(B_OLS)|| 0;
  K=0;

LL: K=K+.1;
  KJ=K* J(J,1);
  SBK= SSQ(E* DIAG( RECIP(M+KJ))*E`*RY);
  OK=OK//(SBK||K);
  IF ( K<2) THEN GOTO LL;
  OK=OK|| J( NROW(OK),1,Q);
  _TMP_ROW = 'ROW1    ' : compress('ROW'+char(nrow(OK)));
  CREATE KK ( RENAME=(_TMP_ROW=ROW  )) FROM OK [ROWNAME=_TMP_ROW ];
  APPEND FROM OK [ROWNAME=_TMP_ROW];

FINISH MAIN;

RUN MAIN;
QUIT;

PROC SGPLOT data=KK;
   scatter y=COL2 x=COL1;
   refline COL3 / axis=x;
   TITLE 'Ridge Trace';
   LABEL COL1='B(K)''B(K)' COL2='K value';
RUN;
title;