SASŪ High-Performance Analytics Samples
The SAS High-Performance Analytics sample programs and install verification tests can be run only after you
edit and submit this file.
The file contains site-specific information about your environment so that the procedures can run successfully.
HPFOREST Example (hpfrste4)
/***************************************************************/
/* */
/* S A S S A M P L E L I B R A R Y */
/* */
/* NAME: hpfrste4.sas */
/* TITLE: HPFOREST Example (hpfrste4) */
/* PRODUCT: HPA */
/* SYSTEM: */
/* KEYS: */
/* PROCS: HPFOREST */
/* DATA: UCI-Machine Learning Repository */
/* Spambase Data Set */
/* */
/* SUPPORT: UPDATE: */
/* REF: */
/* MISC: Spambase Example section of the */
/* HPFOREST chapter of HPA. */
/* Fraction of Training Data To Train A Tree */
/* */
/***************************************************************/
data spambase;
infile 'c:\spambase_data.txt' delimiter = ',';
input wf_make wf_adress wf_all wf_3d wf_our
wf_over wf_remove wf_internet wf_order wf_mail
wf_receive wf_will wf_people wf_report wf_addresses
wf_free wf_business wf_email wf_you wf_credit
wf_your wf_font wf_000 wf_money wf_hp
wf_hpl wf_george wf_650 wf_lab wf_labs
wf_telnet wf_857 wf_data wf_415 wf_85
wf_technology wf_1999 wf_parts wf_pm wf_direct
wf_cs wf_meeting wf_original wf_project wf_re
wf_edu wf_table wf_conference
cf_semicolon cf_parenthese cf_bracket cf_exclamation
cf_dollar cf_pound
average longest total
spam;
run;
%macro hpforest(f=, output_suffix=);
proc hpforest data=spambase alpha = 0.2 maxtrees=200 vars_to_try=26
trainfraction=&f;
input w: c: average longest total/level=interval;
target spam/level=binary;
ods output
FitStatistics =
fitstats_f&output_suffix.(rename=(Miscoob=fraction&output_suffix.));
run;
%mend;
%hpforest(f=0.8, output_suffix=08);
%hpforest(f=0.6, output_suffix=06);
%hpforest(f=0.4, output_suffix=04);
data fitstats;
merge
fitstats_f08
fitstats_f06
fitstats_f04;
rename Ntrees=Trees;
label fraction08 = "Fraction=0.8";
label fraction06 = "Fraction=0.6";
label fraction04 = "Fraction=0.4";
run;
proc sgplot data=fitstats;
title "Misclassification Rate for Various Fractions of Training Data";
series x=Trees y=fraction08/lineattrs=(Pattern=ShortDash Thickness=2);
series x=Trees
y=fraction06/lineattrs=(Pattern=MediumDashDotDot Thickness=2);
series x=Trees y=fraction04/lineattrs=(Pattern=LongDash Thickness=2);
yaxis label='OOB Misclassification Rate';
run;
title;