Resources

Sample Correlation Analysis

/****************************************************************/
/*          S A S   S A M P L E   L I B R A R Y                 */
/*                                                              */
/*    NAME: CORSTAND                                            */
/*   TITLE: Sample Correlation Analysis                         */
/* PRODUCT: IML                                                 */
/*  SYSTEM: ALL                                                 */
/*    KEYS: MATRIX  REGR    SUGI6                               */
/*   PROCS: IML                                                 */
/*    DATA:                                                     */
/*                                                              */
/* SUPPORT: Rick Wicklin                UPDATE: SEP 2013        */
/*     REF:                                                     */
/*    MISC:                                                     */
/*                                                              */
/****************************************************************/


proc iml;
/* Standardize data: Assume no column has 0 variance */
start stdMat(x);
   mean = mean(x);                        /* means for columns */
   cx = x - mean;                     /* center x to mean zero */
   std = std(x);                 /* standard deviation estimate*/
   y = cx / std(x);                    /* scaling to std dev 1 */
   return( y );
finish stdMat;

x = { 1 2 3,
      3 2 1,
      4 2 1,
      0 4 1,
     24 1 0,
      1 3 8};
nm = {age weight height};
std = stdMat(x);
print std[colname=nm label="Standardized Data"];

/* Compute correlations: Assume no missing values  */
start corrMat(x);
   n = nrow(x);                      /* number of observations */
   sum = x[+,];                         /* compute column sums */
   xpx = x`*x - sum`*sum/n;           /* compute sscp matrix   */
   s = diag(1/sqrt(vecdiag(xpx)));           /* scaling matrix */
   corr = s*xpx*s;                       /* correlation matrix */
   return( corr );
finish corrMat;

corr = corrMat(x);
print corr[rowname=nm colname=nm label="Correlation Matrix"];

/* Another way to compute correlations: Assume no missing values */
start corrMat2(x);
   y = StdMat(x);                       /* standardize columns */
   corr = (y`*y)/(nrow(x)-1);            /* correlation matrix */
   return( corr );
finish corrMat2;

c = corrMat2(x);