%macro multnorm (version, data=_last_ , /* input data set */ var= , /* REQUIRED: variables for test */ /* May NOT be a list e.g. var1-var10 */ plot=both , /* Create multivar and/or univar plot? */ hires=yes /* Create high-res plots? */ ); %let _version=1.3; %if &version ne %then %put MULTNORM macro Version &_version; %if %sysevalf(&sysver < 7) %then %do; %put MULTNORM: SAS 7 or later is required. Terminating.; %let opts=; %goto exit; %end; %let opts = %sysfunc(getoption(notes)) _last_=%sysfunc(getoption(_last_)); %if &data=_last_ %then %let data=&syslast; options nonotes; /* Check for newer version */ %if %sysevalf(&sysver >= 7) %then %do; filename _ver url 'http://ftp.sas.com/techsup/download/stat/versions.dat'; data _null_; infile _ver; input name:$15. ver; if upcase(name)="&sysmacroname" then call symput("_newver",ver); run; %if &syserr ne 0 %then %put &sysmacroname: Unable to check for newer version; %else %if %sysevalf(&_newver > &_version) %then %do; %put &sysmacroname: A newer version of the &sysmacroname macro is available.; %put %str( ) You can get the newer version at this location:; %put %str( ) http://support.sas.com/ctx/samples/index.jsp; %end; %end; /* Verify that VAR= option is specified */ %if &var= %then %do; %put ERROR: Specify test variables in the VAR= argument; %goto exit; %end; /* Parse VAR= list */ %let _i=1; %do %while (%scan(&var,&_i) ne %str() ); %let arg&_i=%scan(&var,&_i); %let _i=%eval(&_i+1); %end; %let nvar=%eval(&_i-1); /* Remove observations with missing values */ %put MULTNORM: Removing observations with missing values...; data _nomiss; set &data; if nmiss(of &var )=0; run; /* Quit if covariance matrix is singular */ %let singular=nonsingular; %put MULTNORM: Checking for singularity of covariance matrix...; proc princomp data=_nomiss outstat=_evals noprint; var &var ; run; %if &syserr=3000 %then %do; %put MULTNORM: PROC PRINCOMP required for singularity check.; %put %str( Covariance matrix not checked for singularity.); %goto findproc; %end; data _null_; set _evals; where _TYPE_='EIGENVAL'; if round(min(of &var ),1e-8)<=0 then do; put 'ERROR: Covariance matrix is singular.'; call symput('singular','singular'); end; run; %if &singular=singular %then %goto exit; %findproc: /* Is IML or MODEL available for analysis? */ %let mult=yes; %let multtext=%str( and Multivariate); %put MULTNORM: Checking for necessary procedures...; proc model; quit; %if &syserr=0 and %substr(&sysvlong,1,1)>=8 %then %do; %put MULTNORM: Using SAS/ETS PROC MODEL...; %goto model; %end; proc iml; quit; %if &syserr=0 %then %do; %put MULTNORM: Using SAS/IML...; %goto iml; %end; %put MULTNORM: SAS/ETS PROC MODEL with NORMAL option or SAS/IML is required; %put %str( to perform tests of multivariate normality. Univariate); %put %str( normality tests will be done.); %let mult=no; %let multtext=; %goto univar; %iml: proc iml; reset; use _nomiss; read all var {&var} into _x; /* input data */ /* compute mahalanobis distances */ _n=nrow(_x); _p=ncol(_x); _c=_x-j(_n,1)*_x[:,]; /* centered variables */ _s=(_c`*_c)/_n; /* covariance matrix */ _rij=_c*inv(_s)*_c`; /* mahalanobis angles */ /* get values for probability plot and output to data set */ %if %upcase(%substr(&plot,1,1))=M or %upcase(%substr(&plot,1,1))=B %then %do; _d=vecdiag(_rij#(_n-1)/_n); /* squared mahalanobis distances */ _rank=ranktie(_d); /* ranks of distances */ _chi=cinv((_rank-.5)/_n,_p); /* chi-square quantiles */ _chiplot=_d||_chi; create _chiplot from _chiplot [colname={'MAHDIST' 'CHISQ'}]; append from _chiplot; %end; /* Mardia tests based on multivariate skewness and kurtosis */ _b1p=(_rij##3)[+,+]/(_n##2); /* skewness */ _b2p=trace(_rij##2)/_n; /* kurtosis */ _k=(_p+1)#(_n+1)#(_n+3)/(_n#((_n+1)#(_p+1)-6)); /* small sample correction */ _b1pchi=_b1p#_n#_k/6; /* skewness test statistic */ _b1pdf=_p#(_p+1)#(_p+2)/6; /* and df */ _b2pnorm=(_b2p-_p#(_p+2))/sqrt(8#_p#(_p+2)/_n); /* kurtosis test statistic */ _probb1p=1-probchi(_b1pchi,_b1pdf); /* skewness p-value */ _probb2p=2*(1-probnorm(abs(_b2pnorm))); /* kurtosis p-value */ /* output results to data sets */ _names={"Mardia Skewness","Mardia Kurtosis"}; create _names from _names [colname='TEST']; append from _names; _probs=(_n||_b1p||_b1pchi||_probb1p) // (_n||_b2p||_b2pnorm||_probb2p); create _values from _probs [colname={'N' 'VALUE' 'STAT' 'PROB'}]; append from _probs; quit; %if &syserr ne 0 %then %do; %Put MULTNORM: Errors encountered in PROC IML. Terminating.; %goto exit; %end; data _mult; merge _names _values; run; %univar: /* get univariate test results */ proc univariate data=_nomiss noprint; var &var; output out=_stat normal=&var ; output out=_prob probn=&var ; output out=_n n=&var ; run; data _univ; set _stat _prob _n; run; proc transpose data=_univ name=variable out=_tuniv(rename=(col1=stat col2=prob col3=n)); var &var ; run; data _both; length test $15.; set _tuniv %if &mult=yes %then _mult;; if test='' then if n<=2000 then test='Shapiro-Wilk'; else test='Kolmogorov'; run; proc print data=_both noobs split='/'; var variable n test %if &mult=yes %then value; stat prob; format prob pvalue.; title "MULTNORM macro: Univariate&multtext Normality Tests"; label variable="Variable" test="Test" %if &mult=yes %then value="Multivariate/Skewness &/Kurtosis"; stat="Test/Statistic/Value" prob="p-value"; run; %if %upcase(%substr(&plot,1,1))=N %then %goto exit; %if (%upcase(%substr(&plot,1,1))=U or %upcase(%substr(&plot,1,1))=B) and %upcase(%substr(&hires,1,1))=Y %then %do; ods exclude fitquantiles parameterestimates; proc univariate data=_nomiss noprint; hist &var / normal; run; %end; %if %upcase(%substr(&plot,1,1))=M or %upcase(%substr(&plot,1,1))=B %then %if &mult=yes %then %goto plotstep; %else %goto plot; %else %goto exit; %model: /* Multivariate and Univariate tests with MODEL */ ods select normalitytest; proc model data=_nomiss; %do _i=1 %to &nvar; &&arg&_i = _a; %end; fit &var / normal; title "MULTNORM macro: Univariate&multtext Normality Tests"; run; quit; %if &syserr ne 0 %then %do; %put MULTNORM: Errors encountered in PROC MODEL. Terminating.; %goto exit; %end; %if %upcase(%substr(&plot,1,1))=N %then %goto exit; %if (%upcase(%substr(&plot,1,1))=U or %upcase(%substr(&plot,1,1))=B) and %upcase(%substr(&hires,1,1))=Y %then %do; ods exclude fitquantiles parameterestimates; proc univariate data=_nomiss noprint; hist &var / normal; run; %end; %if %upcase(%substr(&plot,1,1))=U %then %goto exit; %plot: /* compute values for chi-square Q-Q plot */ proc princomp data=_nomiss std out=_chiplot noprint; var &var ; run; %if &syserr=3000 %then %do; %put ERROR: PROC PRINCOMP in SAS/STAT needed to do plot.; %goto exit; %end; data _chiplot; set _chiplot; mahdist=uss(of prin1-prin&nvar ); keep mahdist; run; proc rank data=_chiplot out=_chiplot; var mahdist; ranks rdist; run; data _chiplot; set _chiplot nobs=_n; chisq=cinv((rdist-.5)/_n,&nvar); keep mahdist chisq; run; %plotstep: /* Create a chi-square Q-Q plot NOTE: Very large sample size is required for chi-square asymptotics unless the number of variables is very small. */ %if %upcase(%substr(&hires,1,1))=Y %then %if %sysprod(graph)=1 %then %str(proc gplot data=_chiplot;); %else %do; %put MULTNORM: SAS/GRAPH not found. PROC PLOT will be used instead.; proc plot data=_chiplot; %end; %else %str(proc plot data=_chiplot;); plot mahdist*chisq; label mahdist="Squared Distance" chisq="Chi-square quantile"; title "MULTNORM macro: Chi-square Q-Q plot"; run; quit; %exit: options &opts; title; %mend;