/***************************************************************/ /* */ /* S A S S A M P L E L I B R A R Y */ /* */ /* NAME: hpfrste3.sas */ /* TITLE: HPFOREST Example (hpfrste3) */ /* PRODUCT: HPA */ /* SYSTEM: */ /* KEYS: */ /* PROCS: HPFOREST */ /* DATA: UCI-Machine Learning Repository */ /* Spambase Data Set */ /* */ /* SUPPORT: UPDATE: */ /* REF: */ /* MISC: Spambase Example section of the */ /* HPFOREST chapter of HPA. */ /* Number of Variables to Try When Splitting a Node */ /* */ /***************************************************************/ data spambase; infile 'c:\spambase_data.txt' delimiter = ','; input wf_make wf_adress wf_all wf_3d wf_our wf_over wf_remove wf_internet wf_order wf_mail wf_receive wf_will wf_people wf_report wf_addresses wf_free wf_business wf_email wf_you wf_credit wf_your wf_font wf_000 wf_money wf_hp wf_hpl wf_george wf_650 wf_lab wf_labs wf_telnet wf_857 wf_data wf_415 wf_85 wf_technology wf_1999 wf_parts wf_pm wf_direct wf_cs wf_meeting wf_original wf_project wf_re wf_edu wf_table wf_conference cf_semicolon cf_parenthese cf_bracket cf_exclamation cf_dollar cf_pound average longest total spam; run; %macro hpforest(Vars=); proc hpforest data=spambase alpha = 0.2 maxtrees=200 vars_to_try=&Vars.; input w: c: average longest total/level=interval; target spam/level=binary; ods output FitStatistics = fitstats_vars&Vars.(rename=(Miscoob=VarsToTry&Vars.)); run; %mend; %hpforest(vars=all); %hpforest(vars=40); %hpforest(vars=26); %hpforest(vars=7); %hpforest(vars=2); data fitstats; merge fitstats_varsall fitstats_vars40 fitstats_vars26 fitstats_vars7 fitstats_vars2; rename Ntrees=Trees; label VarsToTryAll = "Vars=All"; label VarsToTry40 = "Vars=40"; label VarsToTry26 = "Vars=26"; label VarsToTry7 = "Vars=7"; label VarsToTry2 = "Vars=2"; run; proc sgplot data=fitstats; title "Misclassification Rate for Various VarsToTry Values"; series x=Trees y = VarsToTryAll/lineattrs=(Color=black); series x=Trees y=VarsToTry40/lineattrs=(Pattern=ShortDash Thickness=2); series x=Trees y=VarsToTry26/lineattrs=(Pattern=ShortDash Thickness=2); series x=Trees y=VarsToTry7/lineattrs=(Pattern=MediumDashDotDot Thickness=2); series x=Trees y=VarsToTry2/lineattrs=(Pattern=LongDash Thickness=2); yaxis label='OOB Misclassification Rate'; run; title;