options noovp linesize=75; title 'Heckman Two-Step Selection Correction Estimation'; /******************************************************************/ /* Macros */ /* */ /* While you may be unfamiliar with SAS macros, their use here */ /* allows you not to modify any of estimation code that follows */ /* slct: value of the selection variable that indicates the */ /* "selected" sample. Usually this is "1", as below, */ /* but you may want to modify it. */ %macro slct; 1 %mend slct; /* nonslct: value of the selection variable that indicates the */ /* "unselected" sample. Usually this is "0", as below */ /* but you may want to modify it. */ %macro nonslct; 0 %mend nonslct; /* prbtlhs: dependent variable for first stage (probit) */ /* replace "sel" below with your variable */ %macro prbtlhs; sel %mend prbtlhs; /* prbtrhs: independent variables for first stage (probit) */ /* replace "x1" below with your variable(s) */ /* you can use more than one line for them */ %macro prbtrhs; x1 %mend prbtrhs; /* olslhs: dependent variable for second stage (ols) */ /* replace "dep" below with your variable */ %macro olslhs; dep %mend olslhs; /* olsrhs: independent variables for second stage (ols) */ /* replace "x2" below with your variable(s) */ /* you can use more than one line for them */ %macro olsrhs; x2 %mend olsrhs; /* Modify this DATA step to access your data. */ /* Do _not_ change the name of dataset a */ /* The KEEP option keeps only variables that will be used in */ /* the estimation procedure; it may be deleted if so desired */ data a(keep=%prbtlhs %prbtrhs %olslhs %olsrhs); infile '~/heckit/data'; /* example data */ /* if you've already */ /* created a SAS data */ /* you don't need an */ /* INFILE */ input dep sel x1 x2; /* if you already have a */ /* SAS data set, created */ /* you'll need a SET */ /* statement instead of */ /* INPUT */ /* Print out descriptive statistics of all variables in */ /* dataset a. This is a good habit to get into. */ /* This procedure can be deleted if so desired */ title2 'Means of All Variables Used in Estimation'; proc means; /***************************************************************/ /* You shouldn't need to modify anything below this point */ /***************************************************************/ /* need to sort data to get coefficients with right signs */ proc sort data=a; by descending %prbtlhs; /***** First Stage: Probit *****/ /* note use of order=data to get coefficients with right signs */ /* will save predicted gammaw's to dataset imr for calculation of inverse mills ratio; note that all variables from dataset a will also be saved in dataset imr */ proc probit order=data ; class %prbtlhs; model %prbtlhs=%prbtrhs / covb; output out=imr xbeta=gammaw; title2 'First Stage: Probit Estimates of Selection'; run; /* Next we create the Inverse Mills' Ratio, as well as some variables we'll need to calculate the Var-Cov Matrix of the Probit Estimates and the OLS Estimates */ data x(keep=intercep %prbtrhs) /* variables for both x and w should */ w(keep=intercep %prbtrhs) /* be the same */ xstar(keep=intercep %olsrhs lambda) delta(keep=delta) h(keep=h) b(keep=%olslhs %olsrhs lambda); /* the retain below just gets the variables in proper order */ retain intercep %prbtrhs %olsrhs %olslhs; set imr; /* create inverse mills ratio */ if (%prbtlhs eq %slct) then lambda=(1/sqrt(2*3.141592654)*exp(-1*gammaw**2/2))/probnorm(gammaw); else if (%prbtlhs eq %nonslct) then lambda=(1/sqrt(2*3.141592654)*exp(-1*gammaw**2/2))/ (probnorm(gammaw)-1); else lambda=.; /* create intercep for use in cross-product matrices */ intercep=1; /* create h for estimating asy. var-cov matrix of probit coefficients */ h=lambda**2+lambda*gammaw; /* create delta for estimating asy. var-cov matrix of ols coefficients; this is a little redundant, but makes the notation easier to follow */ delta=h; if (%prbtlhs eq %slct) then do; /* output datasets with */ output delta; /* selected observations */ output w; /* for calculating OLS */ output xstar; /* standard errors */ output b; end; output x; /* output datasets with */ output h; /* all observations for */ /* calculating probit */ /* standard errors */ /***** Second Stage: OLS *****/ /* Run only on selected sample */ /* Note that selection is done in the above DATA step */ /* we could have also done it here with a WHERE clause */ proc reg data=b outest=olsest; model %olslhs=%olsrhs lambda; output out=err residual=e; title2 'Second Stage: OLS Estimates of Model'; run; /***** Estimate Consistent Standard Errors of OLS Stage *****/ title2 'Consistent Estimates of Standard Errors for Second Stage (OLS)'; proc iml; /* First, calculate asymptotic variance-covariance matrix of the probit estimates. SAS isn't very friendly and doesn't allow us to save them from the probit estimation. Be sure to check these estimates against those produced by the probit procedure above. See Greene, Econometric Analysis, pp. 677-678 for formulae. */ use x; read all var _all_ into x; use h; read all var _all_ into h; k=ncol(x); n=nrow(h); invsig=J(k,k,0); do i= 1 to n; invsig=invsig+J(k,k,h[i,])#(x[i,]`*x[i,]); end; sig=inv(invsig); prbtnm={INTERCEP %prbtrhs}; print,"Asymptotic Variance-Covariance Matrix", "of First Stage (Probit) Coefficients", sig[r=prbtnm c=prbtnm format=12.6]; free x h invsig; /* Now estimate the selection-corrected standard error for the Second Stage (OLS) */ /* Get estimate of coefficient on lambda from olsest, the dataset containing the ols estimates; SAS is nice in that we don't need to keep track of which element of the beta vector has the coefficient, since it's named. */ use olsest; read all var{lambda} into theta; /* deltas */ use delta var{delta}; read all var{delta} into delta; deltabar=sum(delta)/nrow(delta); /* residuals */ use err var{e}; read all var{e} into e; /* calculate adjusted standard error */ sigsqe=e`*e/nrow(e)+theta**2*deltabar; sige=sqrt(sigsqe); print,"Standard Error of Second Stage (OLS)", "Corrected for Selection", sige[format=12.4]; numrowe=nrow(e); free e ; /* calculate rho squared */ rhosq=theta**2/sigsqe; rho=(theta/abs(theta))*sqrt(rhosq); print,"Corrlection of Disturbance in Regression", "and Selection Criterion (Rho)",rho[format=8.4]; use xstar; read all var _all_ into xstar; use w; read all var _all_ into w; /* Calculate Consistent Standard Errors See Greene, Econometric Analysis, pp. 744-747 for formulae */ delcol=delta; do i=1 to ncol(w)-1; delcol=delcol||delta; end; cdeltaw=delcol#w; free delcol; delcol=delta; do i=1 to ncol(xstar)-1; delcol=delcol||delta; end; cdeltaxs=delcol#xstar; free delcol; /**** Version 1.3 (January 1993) ****/ /* cdeltaw=capdelta*w, cdeltaxs=capdelta*xstar */ /* where capdelta=diag(delta). capdelta is */ /* n x n, wherease cdelw is n x ncol(w) and */ /* cdelxstr is n x ncol(xstar). This */ /* reduces memory use. */ /***********************************************/ Q=rhosq*(xstar`*cdeltaw)*sig*(w`*cdeltaxs); Irhosqd=1-rhosq*delta; delcol=Irhosqd; free delta; do i=1 to ncol(xstar)-1; delcol=delcol||Irhosqd; end; Irsdltxs=delcol#xstar; free Irhosqd delcol; /**** Version 1.3 (January 1993) ****/ /* Irsdltxs=(ident(nrow(capdelta))-rwhosq*capdelta)*xstar */ /* again, this is an n x nrow(xstar) matrix, rather than */ /* needing capdelta, which is n x n. */ /**********************************************************/ asyvcov=sigsqe*inv(xstar`*xstar)* (xstar`*Irsdltxs+ Q)* inv(xstar`*xstar); olsnm={INTERCEP %olsrhs LAMBDA}; print ,"Consistent Asymptotic Covariance Matrix of Estimates", "in Second Stage (OLS)",asyvcov[r=olsnm c=olsnm format=12.6]; asyse=sqrt(vecdiag(asyvcov)); use olsest; read all var{INTERCEP %olsrhs LAMBDA} into coeff; variable=coeff`||asyse||(coeff`/asyse)|| 2*(1-probt(abs(coeff`/asyse),numrowe-nrow(coeff))); colnm={"Coeff." "Std. Err." "T-Ratio" "P Value"}; print ,,,"Parameter Estimates and ", "Consistent Asymptotic Standard Errors of Estimates", "in Second Stage (OLS)",variable[r=olsnm c=colnm format=12.4]; quit; endsas;