Resources

SASŪ High-Performance Analytics Samples

The SAS High-Performance Analytics sample programs and install verification tests can be run only after you edit and submit this file. The file contains site-specific information about your environment so that the procedures can run successfully.

HPFOREST Example (hpfrste4)

/***************************************************************/
/*                                                             */
/*          S A S   S A M P L E   L I B R A R Y                */
/*                                                             */
/*    NAME: hpfrste4.sas                                       */
/*   TITLE: HPFOREST Example (hpfrste4)                        */
/* PRODUCT: HPA                                                */
/*  SYSTEM:                                                    */
/*    KEYS:                                                    */
/*   PROCS: HPFOREST                                           */
/*    DATA: UCI-Machine Learning Repository                    */
/*          Spambase Data Set                                  */
/*                                                             */
/* SUPPORT:                                    UPDATE:         */
/*     REF:                                                    */
/*    MISC: Spambase Example section of the                    */
/*          HPFOREST chapter of HPA.                           */
/*          Fraction of Training Data To Train A Tree          */
/*                                                             */
/***************************************************************/

   data spambase;
      infile 'c:\spambase_data.txt' delimiter = ',';
      input wf_make       wf_adress     wf_all        wf_3d      wf_our
            wf_over       wf_remove     wf_internet   wf_order   wf_mail
            wf_receive    wf_will       wf_people     wf_report  wf_addresses
            wf_free       wf_business   wf_email      wf_you     wf_credit
            wf_your       wf_font       wf_000        wf_money   wf_hp
            wf_hpl        wf_george     wf_650        wf_lab     wf_labs
            wf_telnet     wf_857        wf_data       wf_415     wf_85
            wf_technology wf_1999       wf_parts      wf_pm      wf_direct
            wf_cs         wf_meeting    wf_original   wf_project wf_re
            wf_edu        wf_table      wf_conference
            cf_semicolon  cf_parenthese cf_bracket    cf_exclamation
            cf_dollar     cf_pound
            average       longest       total
            spam;
   run;



%macro hpforest(f=, output_suffix=);
proc hpforest data=spambase alpha = 0.2 maxtrees=200 vars_to_try=26
   trainfraction=&f;
   input w: c: average longest total/level=interval;
   target spam/level=binary;
   ods output
   FitStatistics =
         fitstats_f&output_suffix.(rename=(Miscoob=fraction&output_suffix.));
run;
%mend;

%hpforest(f=0.8, output_suffix=08);
%hpforest(f=0.6, output_suffix=06);
%hpforest(f=0.4, output_suffix=04);

data fitstats;
   merge
   fitstats_f08
   fitstats_f06
   fitstats_f04;
   rename Ntrees=Trees;
   label fraction08 = "Fraction=0.8";
   label fraction06 = "Fraction=0.6";
   label fraction04 = "Fraction=0.4";
run;

proc sgplot data=fitstats;
   title "Misclassification Rate for Various Fractions of Training Data";
   series x=Trees y=fraction08/lineattrs=(Pattern=ShortDash Thickness=2);
   series x=Trees
             y=fraction06/lineattrs=(Pattern=MediumDashDotDot Thickness=2);
   series x=Trees y=fraction04/lineattrs=(Pattern=LongDash Thickness=2);
   yaxis label='OOB Misclassification Rate';
run;
title;