Sample Correlation Analysis
/****************************************************************/
/* S A S S A M P L E L I B R A R Y */
/* */
/* NAME: CORSTAND */
/* TITLE: Sample Correlation Analysis */
/* PRODUCT: IML */
/* SYSTEM: ALL */
/* KEYS: MATRIX REGR SUGI6 */
/* PROCS: IML */
/* DATA: */
/* */
/* SUPPORT: Rick Wicklin UPDATE: SEP 2013 */
/* REF: */
/* MISC: */
/* */
/****************************************************************/
proc iml;
/* Standardize data: Assume no column has 0 variance */
start stdMat(x);
mean = mean(x); /* means for columns */
cx = x - mean; /* center x to mean zero */
std = std(x); /* standard deviation estimate*/
y = cx / std(x); /* scaling to std dev 1 */
return( y );
finish stdMat;
x = { 1 2 3,
3 2 1,
4 2 1,
0 4 1,
24 1 0,
1 3 8};
nm = {age weight height};
std = stdMat(x);
print std[colname=nm label="Standardized Data"];
/* Compute correlations: Assume no missing values */
start corrMat(x);
n = nrow(x); /* number of observations */
sum = x[+,]; /* compute column sums */
xpx = x`*x - sum`*sum/n; /* compute sscp matrix */
s = diag(1/sqrt(vecdiag(xpx))); /* scaling matrix */
corr = s*xpx*s; /* correlation matrix */
return( corr );
finish corrMat;
corr = corrMat(x);
print corr[rowname=nm colname=nm label="Correlation Matrix"];
/* Another way to compute correlations: Assume no missing values */
start corrMat2(x);
y = StdMat(x); /* standardize columns */
corr = (y`*y)/(nrow(x)-1); /* correlation matrix */
return( corr );
finish corrMat2;
c = corrMat2(x);