SASŪ High-Performance Analytics Samples
The SAS High-Performance Analytics sample programs and install verification tests can be run only after you
edit and submit this file.
The file contains site-specific information about your environment so that the procedures can run successfully.
HPFOREST Example (hpfrste3)
/***************************************************************/
/* */
/* S A S S A M P L E L I B R A R Y */
/* */
/* NAME: hpfrste3.sas */
/* TITLE: HPFOREST Example (hpfrste3) */
/* PRODUCT: HPA */
/* SYSTEM: */
/* KEYS: */
/* PROCS: HPFOREST */
/* DATA: UCI-Machine Learning Repository */
/* Spambase Data Set */
/* */
/* SUPPORT: UPDATE: */
/* REF: */
/* MISC: Spambase Example section of the */
/* HPFOREST chapter of HPA. */
/* Number of Variables to Try When Splitting a Node */
/* */
/***************************************************************/
data spambase;
infile 'c:\spambase_data.txt' delimiter = ',';
input wf_make wf_adress wf_all wf_3d wf_our
wf_over wf_remove wf_internet wf_order wf_mail
wf_receive wf_will wf_people wf_report wf_addresses
wf_free wf_business wf_email wf_you wf_credit
wf_your wf_font wf_000 wf_money wf_hp
wf_hpl wf_george wf_650 wf_lab wf_labs
wf_telnet wf_857 wf_data wf_415 wf_85
wf_technology wf_1999 wf_parts wf_pm wf_direct
wf_cs wf_meeting wf_original wf_project wf_re
wf_edu wf_table wf_conference
cf_semicolon cf_parenthese cf_bracket cf_exclamation
cf_dollar cf_pound
average longest total
spam;
run;
%macro hpforest(Vars=);
proc hpforest data=spambase alpha = 0.2 maxtrees=200
vars_to_try=&Vars.;
input w: c: average longest total/level=interval;
target spam/level=binary;
ods output
FitStatistics = fitstats_vars&Vars.(rename=(Miscoob=VarsToTry&Vars.));
run;
%mend;
%hpforest(vars=all);
%hpforest(vars=40);
%hpforest(vars=26);
%hpforest(vars=7);
%hpforest(vars=2);
data fitstats;
merge
fitstats_varsall
fitstats_vars40
fitstats_vars26
fitstats_vars7
fitstats_vars2;
rename Ntrees=Trees;
label VarsToTryAll = "Vars=All";
label VarsToTry40 = "Vars=40";
label VarsToTry26 = "Vars=26";
label VarsToTry7 = "Vars=7";
label VarsToTry2 = "Vars=2";
run;
proc sgplot data=fitstats;
title "Misclassification Rate for Various VarsToTry Values";
series x=Trees y = VarsToTryAll/lineattrs=(Color=black);
series x=Trees y=VarsToTry40/lineattrs=(Pattern=ShortDash Thickness=2);
series x=Trees y=VarsToTry26/lineattrs=(Pattern=ShortDash Thickness=2);
series x=Trees y=VarsToTry7/lineattrs=(Pattern=MediumDashDotDot Thickness=2);
series x=Trees y=VarsToTry2/lineattrs=(Pattern=LongDash Thickness=2);
yaxis label='OOB Misclassification Rate';
run;
title;