%macro Vuong(version, data=_last_, response=, freq=, weight=, model1=, p1=, dist1=, model2=, p2=, dist2=, nparm1=, nparm2=, test=both, pzero1=, pzero2=, event=, trials=, scale1=, scale2= ); %let _version=1.1; %if &version ne %then %put VUONG macro Version &_version; %if &data=_last_ %then %let data=&syslast; %let _opts = %sysfunc(getoption(notes)) _last_=%sysfunc(getoption(_last_)); %if &version ne debug %then %str(options nonotes;); /* Check for newer version */ %if %sysevalf(&sysver >= 8.2) %then %do; %let _notfound=0; filename _ver url 'http://ftp.sas.com/techsup/download/stat/versions.dat' termstr=crlf; data _null_; infile _ver end=_eof; input name:$15. ver; if upcase(name)="&sysmacroname" then do; call symput("_newver",ver); stop; end; if _eof then call symput("_notfound",1); run; %if &syserr ne 0 or &_notfound=1 %then %put &sysmacroname: Unable to check for newer version; %else %if %sysevalf(&_newver > &_version) %then %do; %put &sysmacroname: A newer version of the &sysmacroname macro is available.; %put %str( ) You can get the newer version at this location:; %put %str( ) http://support.sas.com/ctx/samples/index.jsp; %end; %end; /* Check arguments. Initialize. */ %if &model1= %then %let model1=Model 1; %if &model2= %then %let model2=Model 2; %let dist1=%upcase(&dist1); %let dist2=%upcase(&dist2); %let test=%upcase(%substr(&test,1,1)); %if &freq= %then %let f=1; %else %let f=&freq; %if &weight= %then %let w=1; %else %let w=&weight; %let calc=0; /* DATA= must be specified and data set must exist */ %if &data= or %sysfunc(exist(&data)) ne 1 %then %do; %put ERROR: DATA= data set not specified or not found.; %goto exit; %end; /* RESPONSE= must be specified */ %if &response= %then %do; %put ERROR: The RESPONSE= parameter is required.; %goto exit; %end; /* TEST= must be valid */ %if &test ne B and &test ne V and &test ne C and &test ne L %then %do; %put ERROR: TEST= must be VUONG, CLARK, BOTH, or LR.; %goto exit; %end; /* If LR test, DIST1 must equal DIST2 */ %if &test=L and &dist1 ne &dist2 %then %do; %put ERROR: Likelihood ratio test requires same distribution for both models.; %goto exit; %end; /* If LR test, NPARM1 and NPARM2 must be specified */ %if &test=L and (&nparm1= or &nparm2=) %then %do; %put ERROR: NPARM1= and NPARM2= must be specified when TEST=LR.; %goto exit; %end; %do i=1 %to 2; /* P1=, P2= must be specified */ %if &&p&i= %then %do; %put ERROR: The P&i= parameter is required.; %goto exit; %end; /* Check for valid DIST1=, DIST2= values */ %if &&dist&i ne NOR and &&dist&i ne BIN and &&dist&i ne MULT and &&dist&i ne OTH and &&dist&i ne GAM and &&dist&i ne IG and &&dist&i ne NB and &&dist&i ne POI and &&dist&i ne ZIP and &&dist&i ne ZINB %then %do; %put ERROR: DIST&i= must be NOR, BIN, MULT, GAM, IG, NB, POI, ZIP, ZINB, or OTH.; %goto exit; %end; /* Must specify SCALE= if NOR, NB, ZINB, GAM, or IG */ %if (&&dist&i=NOR or &&dist&i=NB or &&dist&i=ZINB or &&dist&i=GAM or &&dist&i=IG) and &&scale&i= %then %do; %put ERROR: SCALE&i= must be specified with this distribution.; %goto exit; %end; /* Must specify PZERO= with ZIP or ZINB */ %if (&&dist&i=ZIP or &&dist&i=ZINB) and &&pzero&i= %then %do; %put ERROR: PZERO&i= must be specified with this distribution.; %goto exit; %end; %end; /* Must specify either EVENT= or TRIALS= with BIN */ %if (&dist1=BIN or &dist2=BIN) and &trials= and &event= %then %do; %put ERROR: For Binomial distribution, specify either EVENT= or TRIALS=.; %goto exit; %end; %let dsid=%sysfunc(open(&data)); %if &dsid %then %do; %if %sysfunc(varnum(&dsid,%upcase(&response)))=0 %then %do; %put ERROR: RESPONSE= variable &response not found.; %let rc=%sysfunc(close(&dsid)); %goto exit; %end; %if %sysfunc(varnum(&dsid,%upcase(&p1)))=0 %then %do; %put ERROR: P1= variable &p1 not found.; %let rc=%sysfunc(close(&dsid)); %goto exit; %end; %if %sysfunc(varnum(&dsid,%upcase(&p2)))=0 %then %do; %put ERROR: P2= variable &p2 not found.; %let rc=%sysfunc(close(&dsid)); %goto exit; %end; %if &pzero1 ne %then %do; %if %sysfunc(varnum(&dsid,%upcase(&pzero1)))=0 %then %do; %put ERROR: PZERO1= variable &pzero1 not found.; %let rc=%sysfunc(close(&dsid)); %goto exit; %end; %end; %if &pzero2 ne %then %do; %if %sysfunc(varnum(&dsid,%upcase(&pzero2)))=0 %then %do; %put ERROR: PZERO2= variable &pzero2 not found.; %let rc=%sysfunc(close(&dsid)); %goto exit; %end; %end; %if &trials ne %then %do; %if %sysfunc(varnum(&dsid,%upcase(&trials)))=0 %then %do; %put ERROR: TRIALS= variable &trials not found.; %let rc=%sysfunc(close(&dsid)); %goto exit; %end; %end; %if &freq ne %then %do; %if %sysfunc(varnum(&dsid,%upcase(&freq)))=0 %then %do; %put ERROR: FREQ= variable &freq not found.; %let rc=%sysfunc(close(&dsid)); %goto exit; %end; %end; %if &weight ne %then %do; %if %sysfunc(varnum(&dsid,%upcase(&weight)))=0 %then %do; %put ERROR: WEIGHT= variable &weight not found.; %let rc=%sysfunc(close(&dsid)); %goto exit; %end; %end; %let rc=%sysfunc(close(&dsid)); %end; %else %do; %put ERROR: Could not open DATA= data set.; %goto exit; %end; /* Verify response is binary if DIST1 or DIST2 is BIN and not events/trials */ %if (&dist1=BIN or &dist2=BIN) and &trials= %then %do; ods exclude all; ods output nlevels=_nlvls; proc freq data=&data nlevels; where missing(&response) ne 1; table &response; run; ods select all; data _null_; set _nlvls; call symput ("nlvls",nlevels); run; %if &nlvls ne 2 %then %do; %put ERROR: Binomial response variable, &response, must have exactly two levels.; %goto exit; %end; %end; proc sql; reset noprint; /* If have only one probability in an observation, then response data is not the same for the two models and tests cannot be done. */ select sum( ((&p1 is missing) and (&p2 is not missing)) or ((&p1 is not missing) and (&p2 is missing)) ) as _chk into :chk from &data; /* Compute probability of response for specified distribution for each model */ create table _m(drop=_n _BINresp) as select count(&p1) as _n, %if &freq ne %then sum(&freq) as _sumf,; %else calculated _n as _sumf,; %if (&dist1=BIN or &dist2=BIN) and &trials= %then %do; (&response = &event) as _BINresp, %let trials=1; %let calc=1; %end; %else &response as _BINresp,; %do _i=1 %to 2; &f*&w* %if &&dist&_i=ZIP %then log(pdf("poisson",&response,&&p&_i/(1-&&pzero&_i))* (1-&&pzero&_i)+(&response=0)*&&pzero&_i); %else %if &&dist&_i=POI %then logpdf("poisson",&response,&&p&_i); %else %if &&dist&_i=ZINB %then log(pdf("negbinomial", &response, 1/(1+&&scale&_i*&&p&_i/(1-&&pzero&_i)), 1/&&scale&_i) * (1-&&pzero&_i)+(&response=0)*&&pzero&_i); %else %if &&dist&_i=NB %then logpdf("negbinomial",&response,1/(1+&&scale&_i*&&p&_i),1/&&scale&_i); %else %if &&dist&_i=BIN %then %do; logpdf("binomial", %if &calc %then calculated; _BINresp,&&p&_i,&trials) %end; %else %if &&dist&_i=NOR %then logpdf("normal",&response,&&p&_i,&&scale&_i); %else %if &&dist&_i=GAM %then logpdf("gamma",&response,&&scale&_i,&&p&_i/&&scale&_i); %else %if &&dist&_i=IG %then log(1/&&p&_i) + logpdf("igauss",&response/&&p&_i,1/(&&p&_i*&&scale&_i**2)); %else %if &&dist&_i=MULT or &&dist&_i=OTH %then log(&&p&_i); as _l&_i, %end; /* Compute log probability ratio and Akaike and Schwarz adjustments */ (calculated _l1) - (calculated _l2) as _m %if &nparm1 ne and &nparm2 ne %then %do; , (calculated _l1 - &nparm1/(calculated _n)) - (calculated _l2 - &nparm2/(calculated _n)) as _mA, (calculated _l1 - (log(calculated _sumf)*&nparm1)/(2*(calculated _n))) - (calculated _l2 - (log(calculated _sumf)*&nparm2)/(2*(calculated _n))) as _mS %end; from &data; %if &chk=1 %then %do; %put ERROR: Predicted values from both models must exist in every observation.; %goto exit; %end; /* Compute components of Vuong statistics and Clarke statistics and p-values */ ods exclude all; proc univariate data=_m vardef=n; %if &nparm1 ne and &nparm2 ne %then %do; var _m _mA _mS _l1 _l2; output out=_VC sum=sumM sumMA sumMS LL1 LL2 n=n std=stdm msign=signM signMA signMS probm=prsignM prsignMA prsignMS; %end; %else %do; var _m _l1 _l2; output out=_VC sum=sumM LL1 LL2 n=n std=stdm msign=signM probm=prsignM; %end; run; /* Compute Vuong statistics and p-values */ data _VC; set _VC; length type $16 PrefModC $40 PrefModV $40; %if &test ne L %then %do; type="Unadjusted"; Z = summ/(sqrt(n)*stdm); PrZ = 2*(1-probnorm(abs(Z))); if Z>0 then PrefModV="&model1"; if Z<0 then PrefModV="&model2"; sign=signM; prsign=prsignM; if sign>0 then PrefModC="&model1"; if sign<0 then PrefModC="&model2"; output; %if &nparm1 ne and &nparm2 ne %then %do; type="Akaike Adjusted"; Z = sumMA/(sqrt(n)*stdm); PrZ = 2*(1-probnorm(abs(Z))); if Z>0 then PrefModV="&model1"; if Z<0 then PrefModV="&model2"; sign=signMA; prsign=prsignMA; if sign>0 then PrefModC="&model1"; if sign<0 then PrefModC="&model2"; output; type="Schwarz Adjusted"; Z = sumMS/(sqrt(n)*stdm); PrZ = 2*(1-probnorm(abs(Z))); if Z>0 then PrefModV="&model1"; if Z<0 then PrefModV="&model2"; sign=signMS; prsign=prsignMS; if sign>0 then PrefModC="&model1"; if sign<0 then PrefModC="&model2"; output; %end; %end; %if &test=L %then %do; type="Likelihood Ratio"; ChiSq=2*abs(sumM); DF=abs(&nparm1-&nparm2); PrChi=1-probchi(ChiSq,DF); %end; run; ods select all; /* Display header table with data and model information */ title "The Vuong Macro"; title2 " "; data _head; if _n_=1 then set _VC; length item $40 value $40; item="Data Set"; value="&data"; output; item="Response"; value="&response"; output; %if &dist1=BIN %then %do; %if &event ne %then %do; item="Event Level"; value=cats(&event); output; %end; %if &trials ne %then %do; item="Number of Trials"; value="&trials"; output; %end; %end; %if &freq ne %then %do; item="Frequency Variable"; value="&freq"; output; %end; %if &weight ne %then %do; item="Weight Variable"; value="&weight"; output; %end; item="Number of Observations Used"; value=cats(n); output; item="Model 1"; value="&model1"; output; item=" Distribution"; value="&dist1"; output; item=" Predicted Variable"; value="&p1"; output; %if &nparm1 ne %then %do; item=" Number of Parameters"; value="&nparm1"; output; %end; %if &scale1 ne %then %do; item=" Scale"; value="&scale1"; output; %end; %if &pzero1 ne %then %do; item=" Zero-inflation Probability"; value="&pzero1"; output; %end; item=" Log Likelihood"; value=cats(put(LL1,best10.)); output; item="Model 2"; value="&model2"; output; item=" Distribution"; value="&dist2"; output; item=" Predicted Variable"; value="&p2"; output; %if &nparm2 ne %then %do; item=" Number of Parameters"; value="&nparm2"; output; %end; %if &scale2 ne %then %do; item=" Scale"; value="&scale2"; output; %end; %if &pzero2 ne %then %do; item=" Zero-inflation Probability"; value="&pzero2"; output; %end; item=" Log Likelihood"; value=cats(put(LL2,best10.)); output; stop; run; proc print noobs data=_head label; var item value; label item='00'x value='00'x; title3 "Model Information"; run; /* Display unadjusted and adjusted statistics and p-values */ %if &test=L %then %do; proc print data=_VC noobs label; var type ChiSq DF PrChi; format ChiSq 8.4 PrChi pvalue6.; label type="Test" ChiSq="Chi-Square" PrChi="Pr>Chi-Square"; title3 "Likelihood Ratio Test"; title4 "H0: models are equivalent"; title5 "Ha: models differ"; run; %end; %if &test=B or &test=V %then %do; proc print data=_VC noobs label; var type Z PrZ PrefModV; format Z 8.4 PrZ pvalue6.; label type="Vuong Statistic" PrZ="Pr>|Z|" PrefModV="Preferred Model"; title3 "Vuong Test"; title4 "H0: models are equally close to the true model"; title5 "Ha: one of the models is closer to the true model"; run; %end; %if &test=B or &test=C %then %do; proc print data=_VC noobs label; var type sign prsign PrefModC; format sign 8.4 prsign pvalue6.; label type="Clarke Statistic" sign="M" prsign="Pr>=|M|" PrefModC="Preferred Model"; title3 "Clarke Sign Test"; title4 "H0: models are equally close to the true model"; title5 "Ha: one of the models is closer to the true model"; run; %end; %exit: options &_opts; title; %mend;