/***************************************************************/ /* */ /* S A S S A M P L E L I B R A R Y */ /* */ /* NAME: hpfrste4.sas */ /* TITLE: HPFOREST Example (hpfrste4) */ /* PRODUCT: HPA */ /* SYSTEM: */ /* KEYS: */ /* PROCS: HPFOREST */ /* DATA: UCI-Machine Learning Repository */ /* Spambase Data Set */ /* */ /* SUPPORT: UPDATE: */ /* REF: */ /* MISC: Spambase Example section of the */ /* HPFOREST chapter of HPA. */ /* Fraction of Training Data To Train A Tree */ /* */ /***************************************************************/ data spambase; infile 'c:\spambase_data.txt' delimiter = ','; input wf_make wf_adress wf_all wf_3d wf_our wf_over wf_remove wf_internet wf_order wf_mail wf_receive wf_will wf_people wf_report wf_addresses wf_free wf_business wf_email wf_you wf_credit wf_your wf_font wf_000 wf_money wf_hp wf_hpl wf_george wf_650 wf_lab wf_labs wf_telnet wf_857 wf_data wf_415 wf_85 wf_technology wf_1999 wf_parts wf_pm wf_direct wf_cs wf_meeting wf_original wf_project wf_re wf_edu wf_table wf_conference cf_semicolon cf_parenthese cf_bracket cf_exclamation cf_dollar cf_pound average longest total spam; run; %macro hpforest(f=, output_suffix=); proc hpforest data=spambase alpha = 0.2 maxtrees=200 vars_to_try=26 trainfraction=&f; input w: c: average longest total/level=interval; target spam/level=binary; ods output FitStatistics = fitstats_f&output_suffix.(rename=(Miscoob=fraction&output_suffix.)); run; %mend; %hpforest(f=0.8, output_suffix=08); %hpforest(f=0.6, output_suffix=06); %hpforest(f=0.4, output_suffix=04); data fitstats; merge fitstats_f08 fitstats_f06 fitstats_f04; rename Ntrees=Trees; label fraction08 = "Fraction=0.8"; label fraction06 = "Fraction=0.6"; label fraction04 = "Fraction=0.4"; run; proc sgplot data=fitstats; title "Misclassification Rate for Various Fractions of Training Data"; series x=Trees y=fraction08/lineattrs=(Pattern=ShortDash Thickness=2); series x=Trees y=fraction06/lineattrs=(Pattern=MediumDashDotDot Thickness=2); series x=Trees y=fraction04/lineattrs=(Pattern=LongDash Thickness=2); yaxis label='OOB Misclassification Rate'; run; title;