Polynomial Canonical Discriminant Analysis

 /****************************************************************/
 /*          S A S   S A M P L E   L I B R A R Y                 */
 /*                                                              */
 /*    NAME: CANDPOLY                                            */
 /*   TITLE: Polynomial Canonical Discriminant Analysis          */
 /* PRODUCT: STAT                                                */
 /*  SYSTEM: ALL                                                 */
 /*    KEYS: MULTIV DISCRIM                                      */
 /*   PROCS: CANDISC CHART STANDARD                              */
 /*    DATA:                                                     */
 /*                                                              */
 /* SUPPORT: saswfk                UPDATE: April 4, 2007         */
 /*     REF:                                                     */
 /*    MISC:                                                     */
 /*                                                              */
 /****************************************************************/

title 'Polynomial Canonical Discriminant Analysis';

 /* -------------------------------------------------------------
   Discriminant methods based on normal distributions yield linear
   or quadratic discrimination boundaries. However, by generating
   polynomial terms in a DATA step, you can have CANDISC perform
   a higher-order polynomial transformation that makes it possible
   to classify some kinds of non-normal populations effectively
   with linear discrimination boundaries. In the examples below,
   the first polynomial canonical discrimimant variable provides
   perfect or nearly perfect discrimination between classes that
   cannot be separated by ordinary linear discriminant analysis.
 --------------------------------------------------------------- */

title2 'Circles';
data circle; keep x y c;
   c=1;
   do n=1 to 20;
      x=rannor(12345);
      y=rannor(12345);
      output;
   end;
   c=2;
   do n=1 to 30;
      x=rannor(12345);
      y=rannor(12345);
      l=sqrt(x**2+y**2);
      m=1+3/l;
      x=x*m;
      y=y*m;
      output;
   end;
run;

proc sgplot noautolegend;
   scatter y=y x=x / group=c markerchar=c;
run;

data poly;
   set circle;
   x2=x**2; y2=y**2; xy=x*y;
run;

proc candisc out=out;
   class c;
run;

ods graphics on;

proc univariate noprint;
   class c;
   histogram can1;
run;

title2 'arcs';
data arcs(keep=x y c);
   pi = constant('pi');
   a=20;
   c=1;
   m1=0; m2=0; thetam=pi;
   do n=1 to 75;
      theta=rannor(12345)*pi/4+thetam;
      x=a*cos(theta)+m1+rannor(12345);
      y=a*sin(theta)+m2+rannor(12345);
      output;
   end;
   c=2;
   m1=-5; m2=-20; thetam=0;
   do n=1 to 75;
      theta=rannor(12345)*pi/4+thetam;
      x=a*cos(theta)+m1+rannor(12345);
      y=a*sin(theta)+m2+rannor(12345);
      output;
   end;
run;

proc sgplot noautolegend;
   scatter y=y x=x / group=c markerchar=c;
run;

proc standard m=0 s=1 out=std;
   var x y;
run;

data poly;
   set std;
   x2=x**2; xy=x*y; y2=y**2;
   x3=x**3; x2y=x**2*y; xy2=x*y**2; y3=y**3;
run;

proc candisc data=poly out=out;
   class c;
   var x y x2 y2 xy;
run;

proc univariate noprint;
   class c;
   histogram can1;
run;

proc candisc data=poly out=out;
   class c;
run;

proc univariate noprint;
   class c;
   histogram can1;
run;

ods graphics off;