SASŪ High-Performance Analytics Samples
The SAS High-Performance Analytics sample programs and install verification tests can be run only after you
edit and submit this file.
The file contains site-specific information about your environment so that the procedures can run successfully.
HPFOREST Example (hpfrste2)
/***************************************************************/
/* */
/* S A S S A M P L E L I B R A R Y */
/* */
/* NAME: hpfrste2.sas */
/* TITLE: HPFOREST Example (hpfrste2) */
/* PRODUCT: HPA */
/* SYSTEM: */
/* KEYS: */
/* PROCS: HPFOREST */
/* DATA: UCI-Machine Learning Repository */
/* Spambase Data Set */
/* */
/* SUPPORT: UPDATE: */
/* REF: */
/* MISC: Spambase Example section of the */
/* HPFOREST chapter of HPA. */
/* Out-Of-Bag Estimate Of Misclassification Rate */
/* */
/***************************************************************/
data spambase;
infile 'c:\spambase_data.txt' delimiter = ',';
input wf_make wf_adress wf_all wf_3d wf_our
wf_over wf_remove wf_internet wf_order wf_mail
wf_receive wf_will wf_people wf_report wf_addresses
wf_free wf_business wf_email wf_you wf_credit
wf_your wf_font wf_000 wf_money wf_hp
wf_hpl wf_george wf_650 wf_lab wf_labs
wf_telnet wf_857 wf_data wf_415 wf_85
wf_technology wf_1999 wf_parts wf_pm wf_direct
wf_cs wf_meeting wf_original wf_project wf_re
wf_edu wf_table wf_conference
cf_semicolon cf_parenthese cf_bracket cf_exclamation
cf_dollar cf_pound
average longest total
spam;
run;
proc hpforest data=spambase alpha = 0.2 maxtrees=200;
input w: c: average longest total/level=interval;
target spam/level=binary;
ods output FitStatistics=fitstats(rename=(Ntrees=Trees));
run;
data fitstats;
set fitstats;
label Trees = 'Number of Trees';
label MiscAll = 'Full Data';
label Miscoob = 'OOB';
run;
proc sgplot data=fitstats;
title "OOB vs Training";
series x=Trees y=MiscAll;
series x=Trees y=MiscOob/lineattrs=(pattern=shortdash thickness=2);
yaxis label='Misclassification Rate';
run;
title;