Resources

SASŪ High-Performance Analytics Samples

The SAS High-Performance Analytics sample programs and install verification tests can be run only after you edit and submit this file. The file contains site-specific information about your environment so that the procedures can run successfully.

HPFOREST Example (hpfrste3)

/***************************************************************/
/*                                                             */
/*          S A S   S A M P L E   L I B R A R Y                */
/*                                                             */
/*    NAME: hpfrste3.sas                                       */
/*   TITLE: HPFOREST Example (hpfrste3)                        */
/* PRODUCT: HPA                                                */
/*  SYSTEM:                                                    */
/*    KEYS:                                                    */
/*   PROCS: HPFOREST                                           */
/*    DATA: UCI-Machine Learning Repository                    */
/*          Spambase Data Set                                  */
/*                                                             */
/* SUPPORT:                                    UPDATE:         */
/*     REF:                                                    */
/*    MISC: Spambase Example section of the                    */
/*          HPFOREST chapter of HPA.                           */
/*    Number of Variables to Try When Splitting a Node         */
/*                                                             */
/***************************************************************/

   data spambase;
      infile 'c:\spambase_data.txt' delimiter = ',';
      input wf_make       wf_adress     wf_all        wf_3d      wf_our
            wf_over       wf_remove     wf_internet   wf_order   wf_mail
            wf_receive    wf_will       wf_people     wf_report  wf_addresses
            wf_free       wf_business   wf_email      wf_you     wf_credit
            wf_your       wf_font       wf_000        wf_money   wf_hp
            wf_hpl        wf_george     wf_650        wf_lab     wf_labs
            wf_telnet     wf_857        wf_data       wf_415     wf_85
            wf_technology wf_1999       wf_parts      wf_pm      wf_direct
            wf_cs         wf_meeting    wf_original   wf_project wf_re
            wf_edu        wf_table      wf_conference
            cf_semicolon  cf_parenthese cf_bracket    cf_exclamation
            cf_dollar     cf_pound
            average       longest       total
            spam;
   run;



%macro hpforest(Vars=);
proc hpforest data=spambase alpha = 0.2 maxtrees=200
   vars_to_try=&Vars.;
   input w: c: average longest total/level=interval;
   target spam/level=binary;
   ods output
   FitStatistics = fitstats_vars&Vars.(rename=(Miscoob=VarsToTry&Vars.));
run;
%mend;

%hpforest(vars=all);
%hpforest(vars=40);
%hpforest(vars=26);
%hpforest(vars=7);
%hpforest(vars=2);

data fitstats;
   merge
   fitstats_varsall
   fitstats_vars40
   fitstats_vars26
   fitstats_vars7
   fitstats_vars2;
   rename Ntrees=Trees;
   label VarsToTryAll = "Vars=All";
   label VarsToTry40 = "Vars=40";
   label VarsToTry26 = "Vars=26";
   label VarsToTry7 = "Vars=7";
   label VarsToTry2 = "Vars=2";
run;

proc sgplot data=fitstats;
   title "Misclassification Rate for Various VarsToTry Values";
   series x=Trees y = VarsToTryAll/lineattrs=(Color=black);
   series x=Trees y=VarsToTry40/lineattrs=(Pattern=ShortDash Thickness=2);
   series x=Trees y=VarsToTry26/lineattrs=(Pattern=ShortDash Thickness=2);
   series x=Trees y=VarsToTry7/lineattrs=(Pattern=MediumDashDotDot Thickness=2);
   series x=Trees y=VarsToTry2/lineattrs=(Pattern=LongDash Thickness=2);
   yaxis label='OOB Misclassification Rate';
run;
title;