Example 2 for PROC FASTCLUS
/****************************************************************/
/* S A S S A M P L E L I B R A R Y */
/* */
/* NAME: FASTEX2 */
/* TITLE: Example 2 for PROC FASTCLUS */
/* PRODUCT: STAT */
/* SYSTEM: ALL */
/* KEYS: cluster analysis, effect of outliers */
/* PROCS: FASTCLUS, SGPLOT, SGSCATTER */
/* DATA: */
/* */
/* SUPPORT: sasrbk */
/* REF: PROC FASTCLUS, EXAMPLE 2. */
/* MISC: */
/****************************************************************/
title 'Using PROC FASTCLUS to Analyze Data with Outliers';
data x;
drop n;
do n=1 to 100;
x=rannor(12345)+2;
y=rannor(12345);
output;
end;
do n=1 to 100;
x=rannor(12345)-2;
y=rannor(12345);
output;
end;
do n=1 to 10;
x=10*rannor(12345);
y=10*rannor(12345);
output;
end;
run;
title2 'Preliminary PROC FASTCLUS Analysis with 20 Clusters';
proc fastclus data=x outseed=mean1 maxc=20 maxiter=0 summary;
var x y;
run;
proc sgscatter data=mean1;
compare y=(_gap_ _radius_) x=_freq_;
run;
/* Remove low frequency clusters. */
data seed;
set mean1;
if _freq_>5;
run;
title2 'PROC FASTCLUS Analysis Using LEAST= Clustering Criterion';
title3 'Values < 2 Reduce Effect of Outliers on Cluster Centers';
proc fastclus data=x seed=seed maxc=2 least=1 out=out;
var x y;
run;
proc sgplot data=out;
scatter y=y x=x / group=cluster;
run;
/* Run PROC FASTCLUS again, selecting seeds from the */
/* high frequency clusters in the previous analysis */
/* STRICT= prevents outliers from distorting the results. */
title2 'PROC FASTCLUS Analysis Using STRICT= to Omit Outliers';
proc fastclus data=x seed=seed
maxc=2 strict=3.0 out=out outseed=mean2;
var x y;
run;
proc sgplot data=out;
scatter y=y x=x / group=cluster;
run;
/* Run PROC FASTCLUS one more time with zero iterations */
/* to assign outliers and tails to clusters. */
title2 'Final PROC FASTCLUS Analysis Assigning Outliers to Clusters';
proc fastclus data=x seed=mean2 maxc=2 maxiter=0 out=out;
var x y;
run;
proc sgplot data=out;
scatter y=y x=x / group=cluster;
run;