%macro corrci(version, data=, corr=, out=, byvar=); %if &version ne %then %put CORRCI macro Version 1.0; * Is there a BY variable? If not, create a dummy BY var ; * the macro variable check is an indicator var for ; * whether a BY var is not specified; data _null_; x=(2>length("&byvar &byvar")); call symput('check',x); if x then call symput('byvar','_byvar_'); data &out; set &corr; * subset the corr dataset; if &check then &byvar=1; * provide values for dummy BY var; if _type_='CORR' then output; var1=' '; var2=var1; * create macro variables for the variable names and for; * the number of variables in the correlation matrix; * only the first BY group is used; data _null_; set &out end=eof; retain _check_; if _n_=1 then _check_=&byvar; if &byvar=_check_ then do; if _name_ ne ' ' then do; n+1; call symput('var'||left(n),_name_); end; end; if eof then do; call symput('n',n); dim=n*(n+1)/2; call symput('dim',dim); end; run; * restructure the corr data set; data &out; set &out; cond=0; retain _count_ 0 cond; keep &byvar var1 var2 corr ; nn=&n*int(_n_/&n)-2; if _type_='CORR' then do; %do i=1 %to &n; corr=&&var&i; var1=_name_; var2="&&var&i"; if var1=var2 then cond=1; if cond then output; %end; end; data temp(rename=(_check_=&byvar)); retain _check_ %do j=1 %to &dim; q&j %end; ; keep _check_ %do j=1 %to &dim; q&j %end; ; array nn{&dim} %do q0 = 1 %to &dim; q&q0 %end; ; set &data end=eof; if &check then &byvar=1; if _n_=1 then do; _check_=&byvar; do q0=1 to &dim; nn[q0]=0; end; end; else; if _check_=&byvar then do; %do q0=1 %to &n; %do q00=&q0 %to &n; q000=( &dim - (&n-&q0+1)*(&n-&q0+2)/2 ) + ( &q00 - &q0 + 1 ); nn[q000]=nn[q000]+(&&var&q0>.)*(&&var&q00>.); %end; %end; end; else do; output; do q0=1 to &dim; nn[q0]=0; end; _check_=&byvar; %do q0=1 %to &n; %do q00=&q0 %to &n; q000=( &dim - (&n-&q0+1)*(&n-&q0+2)/2 ) + ( &q00 - &q0 + 1 ); nn[q000]=nn[q000]+(&&var&q0>.)*(&&var&q00>.); %end; %end; end; if eof then do; output; end; run; proc transpose data=temp out=temp(drop=_name_); by &byvar; * compute the p-values after merging the correlation with n values; data &out; drop _byvar_; merge &out temp(rename=(col1=n)); if corr=. then do; pvalue=.; end; else if n<=2 then do; pvalue=.; end; else if abs(corr)=1 then do; pvalue=0.0; end; else do; pvalue=2*(1-probt(abs(corr/sqrt(1-(corr*corr)) *sqrt(n-2)),n-2)); end; output; _byvar_=.; run; * clean up the work directory by deleting this temp data set; proc datasets library=work; delete temp; run; * compute upper and lower 95% confidence limits for correlations based on Fishers Z ; data &out; set &out; if var1=var2 then delete; if n>3 then do; if abs(corr)<1 then do; fishersz=0.5*(log(1+corr)-log(1-corr)); sigmaz=1/sqrt(n-3); l95=fishersz-1.96*sigmaz; u95=fishersz+1.96*sigmaz; * use inverse of Fishers Z transformation in order to calculate limits; l95=(exp(2*l95)-1)/(exp(2*l95)+1); u95=(exp(2*u95)-1)/(exp(2*u95)+1); end; end; run; %mend;