MODECLUS Analysis of Artificial Data Sets

 /****************************************************************/
 /*          S A S   S A M P L E   L I B R A R Y                 */
 /*                                                              */
 /*    NAME: MODECLU6                                            */
 /*   TITLE: MODECLUS Analysis of Artificial Data Sets           */
 /* PRODUCT: SAS                                                 */
 /*  SYSTEM: ALL                                                 */
 /*    KEYS: CLUSTER                                             */
 /*   PROCS: MODECLUS SGPLOT                                     */
 /*    DATA:                                                     */
 /*                                                              */
 /* SUPPORT: saswfk                      UPDATE:                 */
 /*     REF:                                                     */
 /*    MISC:                                                     */
 /*                                                              */
 /****************************************************************/

title 'Modeclus Analysis';

data compact;
   keep x y;
   n=50; scale=1;
   mx=0; my=0; link generate;
   mx=8; my=0; link generate;
   mx=4; my=8; link generate;
   stop;
generate:
   do i=1 to n;
      x=rannor(1)*scale+mx;
      y=rannor(1)*scale+my;
      output;
   end;
   return;
run;

proc modeclus data=compact m=6 k=10 to 50 by 5 short;
   title2 'of Data Containing Well-Separated, Compact Clusters';
run;

proc modeclus data=compact m=6 k=20 out=out;
run;

proc sgplot;
   scatter y=y x=x / markerchar=cluster;
run;

*---------------------------------------------------------------------;

data closer;
   keep x y;
   n=50; scale=1;
   mx=0; my=0; link generate;
   mx=3; my=0; link generate;
   mx=1; my=2; link generate;
   stop;
generate:
   do i=1 to n;
      x=rannor(9)*scale+mx;
      y=rannor(9)*scale+my;
      output;
   end;
   return;
run;

proc modeclus data=closer m=6 k=10 to 50 by 5 short;
   title2 'of Data Containing Poorly-Separated, Compact Clusters';
run;

proc modeclus data=closer m=6 k=20 out=out;
run;

proc sgplot;
   scatter y=y x=x / markerchar=cluster;
run;

*---------------------------------------------------------------------;

data unequal;
   keep x y;
   mx=1; my=0; n=20; scale=.5; link generate;
   mx=6; my=0; n=80; scale=2.; link generate;
   mx=3; my=4; n=40; scale=1.; link generate;
   stop;
generate:
   do i=1 to n;
      x=rannor(1)*scale+mx;
      y=rannor(1)*scale+my;
      output;
   end;
   return;
run;

proc modeclus data=unequal m=6 k=10 to 50 by 5 short;
   title2 'of Data Containing Compact Clusters of Unequal Size';
run;

proc modeclus data=unequal m=6 k=20 out=out;
run;

proc sgplot;
   scatter y=y x=x / markerchar=cluster;
run;

*---------------------------------------------------------------------;

data elongate;
   keep x y;
   ma=8; mb=0; link generate;
   ma=6; mb=8; link generate;
   stop;
generate:
   do i=1 to 50;
      a=rannor(7)*6+ma;
      b=rannor(7)+mb;
      x=a-b;
      y=a+b;
      output;
   end;
   return;
run;

proc modeclus data=elongate m=6 k=10 to 50 by 5 short;
   title2 'of Data Containing Parallel Elongated Clusters';
run;

proc modeclus data=elongate m=6 k=20 out=out;
run;

proc sgplot;
   scatter y=y x=x / markerchar=cluster;
run;

*---------------------------------------------------------------------;

data irreg;
   keep x y;
   do i=1 to 100;
      a=i*.0628319;
      x=cos(a)+(i>50)+rannor(7)*.1;
      y=sin(a)+(i>50)*.3+rannor(7)*.1;
      output;
   end;
run;

proc modeclus data=irreg m=6 k=10 to 50 by 5 cascade=1 short;
   title2 'of Data Containing Irregular Clusters';
run;

proc modeclus data=irreg m=6 k=20 cascade=1 out=out;
run;

proc sgplot;
   scatter y=y x=x / markerchar=cluster;
run;