Resources

Documentation Example 6 for PROC HPSPLIT

/****************************************************************/
/*          S A S   S A M P L E   L I B R A R Y                 */
/*                                                              */
/*    NAME: HPSPLEX6                                            */
/*   TITLE: Documentation Example 6 for PROC HPSPLIT            */
/*    DESC:                                                     */
/*     REF: None                                                */
/*                                                              */
/* PRODUCT: HPSTAT                                              */
/*  SYSTEM: ALL                                                 */
/*    KEYS:                                                     */
/*   PROCS: HPSTAT                                              */
/*                                                              */
/* SUPPORT: saswfk                                              */
/****************************************************************/

data Wine;
   %let url = http://archive.ics.uci.edu/ml/machine-learning-databases;
   infile "&url/wine/wine.data" url delimiter=',';
   input Cultivar Alcohol Malic Ash Alkan Mg TotPhen
         Flav NFPhen Cyanins Color Hue ODRatio Proline;
   label Cultivar = "Cultivar"
         Alcohol  = "Alcohol"
         Malic    = "Malic Acid"
         Ash      = "Ash"
         Alkan    = "Alkalinity of Ash"
         Mg       = "Magnesium"
         TotPhen  = "Total Phenols"
         Flav     = "Flavonoids"
         NFPhen   = "Nonflavonoid Phenols"
         Cyanins  = "Proanthocyanins"
         Color    = "Color Intensity"
         Hue      = "Hue"
         ODRatio  = "OD280/OD315 of Diluted Wines"
         Proline  = "Proline";
run;

ods graphics on;

proc hpsplit data=Wine seed=15531 cvcc;
   ods select CrossValidationValues CrossValidationASEPlot;
   ods output CrossValidationValues=p;
   class Cultivar;
   model Cultivar = Alcohol Malic Ash Alkan Mg TotPhen Flav
                    NFPhen Cyanins Color Hue ODRatio Proline;
   grow entropy;
   prune costcomplexity;
run;

proc sort data=p;       /* Ensure MiscAverage ascends within nLeaves ties */
   by descending nleaves MiscAverage;
 run;

data plot;
   set p;
   by descending nleaves;
   if first.nleaves;                            /* Delete nLeaves dups    */
   retain yval 1e10;
   MiscMax = MiscAverage + MISCStdErr;          /* Error bar max          */
   MiscMin = MiscAverage - MISCStdErr;          /* Error bar min          */
   if MiscAverage < yval then do;
      yval = MiscAverage;                       /* Min MiscAverage        */
      call symputx('yref', MiscMax);            /* 1-SE reference line    */
      call symputx('xref', nleaves);            /* nLeaves reference line */
   end;
run;

data plot;                                      /* nLeaves at 1-SE point */
   set plot;
   if MiscAverage <= &yref then call symputx('nleaves', nleaves);
run;

data plot;
   set plot;
   if &nleaves = nleaves then do;               /* Highlight 1-SE value   */
      xse      = nleaves;                       /* X value for 1-SE       */
      yse      = MiscAverage;                   /* Y value for 1-SE       */
   end;
   if &xref    = nleaves then do;               /* Highlight minimum      */
      xmin     = nleaves;                       /* X value for minimum    */
      ymin     = MiscAverage;                   /* Y value for minimum    */
   end;
   format pruningparameter best6.;              /* X axis format          */
run;

proc sgplot noautolegend;
   title 'Cost-Complexity Analysis for LobaOreg Using Cross Validation';
   refline &xref / axis=x2 lineattrs=(pattern=shortdash);
   refline &yref / axis=y  name='a' legendlabel='1-SE';
   series  y=MiscAverage x=nleaves / x2axis lineattrs=graphdata1;
   scatter y=MiscAverage x=pruningparameter / yerrorlower=MiscMin
                         yerrorupper=MiscMax errorbarattrs=graphdata1;
   scatter y=ymin x=xmin / markerattrs=GraphData2(symbol=circlefilled size=9px)
                           x2axis name='b' legendlabel='Min Misc Rate';
   scatter y=yse  x=xse  / markerattrs=GraphData3(symbol=circlefilled size=9px)
                           x2axis name='c' legendlabel='1-SE Selection';
   xaxis   type=discrete label='Cost-Complexity Parameter' reverse;
   x2axis  type=discrete label='Number of Leaves';
   yaxis   label='Average Misclassification Rate' min=0 max=1;
   keylegend 'a' 'b' 'c' / location=inside across=1 noborder;
run;

proc hpsplit data=Wine seed=15531;
   class Cultivar;
   model Cultivar = Alcohol Malic Ash Alkan Mg TotPhen Flav
                    NFPhen Cyanins Color Hue ODRatio Proline;
   prune costcomplexity(leaves=&nLeaves);
run;