Resources

Using the LEAST= Option with PROC FASTCLUS

 /****************************************************************/
 /*          S A S   S A M P L E   L I B R A R Y                 */
 /*                                                              */
 /*    NAME: FASTLEAS                                            */
 /*   TITLE: Using the LEAST= Option with PROC FASTCLUS          */
 /* PRODUCT: STAT                                                */
 /*  SYSTEM: ALL                                                 */
 /*    KEYS: CLUSTER                                             */
 /*   PROCS: FASTCLUS SGPLOT SORT                                */
 /*    DATA:                                                     */
 /*                                                              */
 /* SUPPORT:                             UPDATE: January 2010    */
 /*     REF:                                                     */
 /*    MISC:                                                     */
 /*                                                              */
 /****************************************************************/


title 'Cluster Analysis of Artificial Five-Group Data With Outliers';

data clusdata(drop=n);
   do g=1 to 5;
      if      g=1 then do; xm=3;  ym=6;  end;
      else if g=2 then do; xm=3;  ym=3;  end;
      else if g=3 then do; xm=8;  ym=3;  end;
      else if g=4 then do; xm=12; ym=6;  end;
      else             do; xm=5;  ym=13; end;
      do n=1 to 30-g*3;
         x=xm+rannor(8237657);
         y=ym+rannor(8237657);
         random=ranuni(8237657);
         output;
      end;
   end;
   xm=5;
   ym=5;
   do n=1 to 20;
      x=xm+ranexp(8237657)*10;
      y=ym+ranexp(8237657)*10;
      random=ranuni(8237657);
      output;
   end;
run;

proc sort;
   by random;
run;

proc sgplot noautolegend;
   scatter y=y x=x / markerchar=g group=g;
run;

* Preliminary clusters for initial seeds;

title2 "Preliminary Clusters";
proc fastclus maxc=25 maxiter=0 data=clusdata out=out outseed=init;
   var x y;
run;

data init;
   set init;
   if _freq_ > 3;
run;

* Set ups macro for repeated runs of FASTCLUS followed by PROC SGPLOT;

%macro runfast(clus,least);
   title2 "Fitting &clus Clusters with LEAST=&least";

   proc fastclus least=&least maxc=&clus maxiter=99
        data=clusdata seed=init out=out;
      var x y;
   run;

   proc sgplot noautolegend;
      scatter y=y x=x / markerchar=cluster group=cluster;
   run;
%mend;

%macro driver(start,end);
   %do clus=&start %to &end;
      %runfast(&clus,1);
      %runfast(&clus,1.5);
      %runfast(&clus,2);
      %runfast(&clus,5);
      %runfast(&clus,max);
   %end;
%mend;

* The analysis is run for 2 to 6 clusters;

%driver(start=2,end=6);