Miscellaneous Examples, GENMOD Procedure

 /****************************************************************/
 /*          S A S   S A M P L E   L I B R A R Y                 */
 /*                                                              */
 /*    NAME: GENMISC                                             */
 /*   TITLE: Miscellaneous Examples, GENMOD Procedure            */
 /* PRODUCT: STAT                                                */
 /*  SYSTEM: ALL                                                 */
 /*    KEYS: generalized linear models,                          */
 /*   PROCS: GENMOD                                              */
 /* SUPPORT: SASGJJ                                              */
 /*     REF:                                                     */
 /*    MISC:                                                     */
 /****************************************************************/


 /*****************************************************************
 | Negative binomial regression.                                  |
 | Code models count data using SAS/STAT GENMOD Procedure.        |
 | 0 responses are accommodated. The example data set is a subset |
 | of a study on predictors for seizures performed by the author. |
 | Number of seizures is the response while the log of the        |
 | population sharing a similar pattern of covariates is used as  |
 | an offset. Indicator predictors are psy=psychological problems;|
 | cvd = cardio-vascular disease; tum = tumor. A model using      |
 | standard Poisson regression proves to be highly overdispersed. |
 | The data may be satisfactorily modeled using a quadratic       |
 | specification (GLM) of the log-linked negative binomial with a |
 | variance adjustment parameter, k, of .14. The user provides a  |
 | value to k which results in the DEV/DF approximating 1.0. Also |
 | check the CHI2/DF - if the value is within 0.8 to 1.2, the     |
 | model is correctly specified. Code for the canonical link and  |
 | inverse link is provided; however, the canonical form does not |
 | converge with this data.                                       |
 *****************************************************************/

data seize;
   input seiz pop psy cvd tum;
   lpop=log(pop);
   datalines;
  0      8   1   0   0
  0     15   0   0   0
  0      5   1   0   0
  3     90   1   0   0
  3     89   0   1   0
651  24795   1   0   0
  2     37   0   0   0
  7    234   0   0   0
  0     14   0   0   0
  0      6   0   1   0
 10    659   0   0   0
  3     64   0   1   0
  4    184   0   0   0
  0      9   0   1   0
  3     62   1   1   0
  0      6   1   0   0
  0     15   0   0   0
  0      7   1   0   0
  0     10   0   1   0
 35    589   1   0   0
  0     84   0   0   0
  1     35   0   0   0
  4    346   0   0   0
  0     11   0   0   0
765 199087   0   0   0
  0      6   0   0   0
  9    141   0   1   0
  2     38   0   0   0
  2    133   0   0   0
  1      7   1   0   0
  0      7   0   0   0
  2     19   1   0   0
  0      7   1   0   0
  0      5   0   0   0
  0      7   1   1   0
  3    261   0   0   0
  5    144   1   0   0
  2    334   0   0   0
  0    12    0   0   0
  9   205    0   1   0
 63  2152    0   0   0
  4   353    0   0   0
  2    10    0   0   1
  0    11    0   0   0
  0    10    0   1   0
  0   206    0   0   0
 31  2957    0   0   0
  2     8    0   0   1
  0     6    0   1   0
  0    15    1   0   0
  1    98    0   0   0
  0     5    1   0   0
  9    78    0   1   0
  1    25    0   1   0
  0     6    0   1   0
  6   256    0   0   0
;

 /* Data is first modeled using Poisson regression */
proc genmod data=seize;
   model seiz = psy cvd tum / dist   = poisson
                              offset = lpop;
run;

 /*  Data is now modeled using a
     log-linked negative binomial, k=.14  */

proc genmod data=seize;
   k = .14;                  /* adjusted by user */
   a = _MEAN_;
   y = _RESP_;

   /* canonical link:  fwdlink link = log(a/(a+k)); */
   /* canonical ilink: invlink ilink = k/(exp(-_XBETA_)-1); */
   variance var = a+k*a*a;
   if (y>0) then d = 2 * (y*log(y/a)-(1+k*y)/k *
                                 log((1+k*y)/(1+k*a)));
   else if (y=0) then d = 2 * log(1+k*a)/k; /* allows 0 response */
   deviance dev = d;
   model seiz= psy cvd tum  / offset = lpop noscale link = log;
run;

/**************************************************************/
   *---------------Poisson Polynomial Regression-------*
   *---------------------------------------------------*;
/**************************************************************/
data poi;
   label c='Response';
   keep c x;
   phi = 5;
   seedg = 94571;
   seedp = 7366;
   do i = 1500 to 2500 by 100;
      x = (i-2000)/1000;
      poly =  - (2.5 * x) - (6 * x * x) + (40 * x * x * x);
      mu1 = exp( poly ) * 100;
      alpha = 1/phi;
      beta = mu1*phi;
      call rangam( seedg, beta, mu );
      mu = alpha * mu;
      call ranpoi( seedp, mu, c );
      output;
   end;
run;

proc genmod data = poi;
   model c = x x*x x*x*x x*x*x*x / dist = poi
                                   link = log
                                   type1;
run;

proc genmod data = poi;
   ods output Obstats = pred;
   model c = x x*x x*x*x / dist = poi
                           link = log
                           obstats
                           pscale
                           type1;
run;

data new;
   set poi( keep = x );
   set pred ( keep = c pred reschi resdev );
run;


proc sgplot data=new;
   series  x=x y=pred;
   scatter x=x y=c;
   xaxis label = 'Explanatory Variable X';
   yaxis label = 'Response';
run;

proc sgplot data=new;
   scatter x=x y=reschi;
   xaxis label = 'Explanatory Variable X';
   yaxis label = 'Pearson Residual';
run;

proc sgplot data=new;
   scatter x=x y=resdev;
   xaxis label = 'Explanatory Variable X';
   yaxis label = 'Deviance Residual';
run;

   /*---------------Component Reliability Example------------*/
   /*--------------------------------------------------------*/

data equip;
   input Year Month $ usehr rem @@;
   if usehr > 0 then lusehr = log(usehr);
   else lusehr=.;
   datalines;
1983  Jan    0 0   1983  Feb   10 0   1983  Mar   26 0
1983  Apr  192 0   1983  May  238 0   1983  Jun  374 1
1983  Jul  356 0   1983  Aug  358 4   1983  Sep  594 0
1983  Oct  786 1   1983  Nov  885 2   1983  Dec  981 2
1984  Jan 1044 0   1984  Feb 1266 1   1984  Mar 1533 1
1984  Apr 1510 2   1984  May 1818 2   1984  Jun 2210 7
1984  Jul 2003 2   1984  Aug 2489 4   1984  Sep 2841 2
1984  Oct 3236 3   1984  Nov 3104 5   1984  Dec 2919 3
1985  Jan 2849 3   1985  Feb 3119 2   1985  Mar 3596 7
1985  Apr 4003 5   1985  May 4067 5   1985  Jun 3690 2
1985  Jul 3509 2   1985  Aug 3653 9   1985  Sep 4186 3
1985  Oct 4562 2   1985  Nov 4277 2   1985  Dec 3838 2
1986  Jan 4253 3   1986  Feb 4242 5   1986  Mar 5119 5
1986  Apr 5281 7   1986  May 5163 4   1986  Jun 4977 2
1986  Jul 4663 4   1986  Aug 5465 4   1986  Sep 5314 5
1986  Oct 5485 4   1986  Nov 5688 6   1986  Dec 5403 0
1987  Jan 5749 2   1987  Feb 5682 4   1987  Mar 6460 3
1987  Apr 6681 3   1987  May 7215 3   1987  Jun 6650 8
1987  Jul 7094 2   1987  Aug 6600 6   1987  Sep 7649 3
1987  Oct 7615 9   1987  Nov 6733 4   1987  Dec 6540 10
1988  Jan 6680 4   1988  Feb 7646 6   1988  Mar 8139 4
1988  Apr 7829 4   1988  May 8220 3   1988  Jun 7671 5
1988  Jul 7120 3   1988  Aug 7293 4   1988  Sep 8045 5
1988  Oct 8567 3   1988  Nov 7682 6   1988  Dec 7048 3
1989  Jan 7369 2   1989  Feb 7270 6   1989  Mar 8124 1
1989  Apr 7636 5   1989  May 7512 5   1989  Jun 7049 4
1989  Jul 7286 2   1989  Aug 7624 2   1989  Sep 7623 2
1989  Oct 7970 5   1989  Nov 7569 1   1989  Dec 7156 10
1990  Jan 7404 3   1990  Feb 7447 8   1990  Mar 7951 12
1990  Apr 8065 7   1990  May 7742 3   1990  Jun 7109 2
1990  Jul 7229 4   1990  Aug 7279 3   1990  Sep 7366 0
1990  Oct 7955 6   1990  Nov 7044 6   1990  Dec 3929 3
;

proc genmod data = equip order=data;
   class month year;
   model rem = month year / dist=poisson
                            link=log
                            offset = lusehr
                            type1
                            type3;
run;

proc genmod data = equip order=data;
   class month year;
   model rem = month year / dist=poisson
                            link=log
                            offset = lusehr
                            type1
                            type3
                            dscale;
run;


proc genmod data = equip order=data;
   class month year;
   ods output  Obstats = outdata;
   model rem =  year / dist=poisson
                       link=log
                       offset = lusehr
                       type1
                       type3
                       obstats;
run;

data rates;
   set outdata;
   if month = 'dec';
   rate = exp(xbeta);
run;

proc print;
   var year rate;
run;

proc genmod data = equip order=data;
   class month year;
   model rem = year / dist=poisson
                      link=log
                      offset = lusehr;
   contrast 'Year86-90' year 0 0 0 1 -1,
                        year 0 0 0 1 0 -1,
                        year 0 0 0 1 0 0 -1,
                        year 0 0 0 1 0 0 0 -1;
run;


   /*---------------Modeling Variance Heterogeneity----------*/
   /* Aitkin, M. (1987), Modelling Variance Heterogeneity in */
   /* Normal Regression Using GLIM, Appl. Statist. 36, No. 3,*/
   /* pp 332-339                                             */
   /*--------------------------------------------------------*/

data semi;
   drop j;
   do p = 'a','b';
      do x = 10 to 15;
         if p = 'a' then do;
            do j = 1 to 2;
               y = x + 2*rannor(1302);
               output;
            end;
         end;
         else if p = 'b' then do;
            do j = 1 to 2;
            y = 3*x + 5*rannor(4567);
            output;
            end;
         end;
      end;
   end;
run;

%macro semimod;

   ods listing close;
   ods output;

   data work;
      * input data set;
      set semi;
      wgt = 1;
      sum = 0;
   run;

   data tmp;
      sum = 0;
   run;

   %let conv = 0;
   %let iter = 0;
   * iterate until convergence;
   %do %while( &conv = 0 );

      data _old;
         set tmp;
         devold = sum;
         keep devold;
      run;

      * mean model;
      * set NOSCALE so that scale is not estimated;
      * select OBSTATS to get residuals;
      * SCWGT selects dispersion parameter weights;
      proc genmod data=work;
         class p;
         scwgt wgt;
         ods output Obstats = A;
         ods output ParameterEstimates = meanests;
         model y = p x p*x / noscale
                             obstats;
      run;

      * this data set contains squared residuals;
      data work;
         drop sum pred xbeta std hesswgt upper lower
              resraw reschi resdev;
         set A;
         set work;
         rsquare = resraw*resraw;
      run;

      * variance model;
      * set scoring=100 to get Fisher scores;
      * set scale = .5 for 1 dof in gamma distribution;
      proc genmod data=work;
         class p;
         ods output  Obstats = C;
         ods output  ParameterEstimates = varests;
         model rsquare = p x p*x / dist=gamma
                                   link=log
                                   obstats
                                   scoring=100
                                   noscale
                                   scale=.5;
      run;

      * get weights for 1st model;
      * compute sum = overall deviance;
      data work;
         drop xbeta std hesswgt upper lower resraw reschi resdev;
         set work;
         set C;
         wgt = 1./pred;
         sum + (rsquare/pred + log(pred) + log(6.283185));
      run;

      data tmp;
         set work nobs = last;
         keep sum;
         if( _n_ = last );
      run;

      %let iter=%eval( &iter+1 );
      %put &iter;

      * check convergence;
      data _NULL_;
         set tmp;
         set _old;
         put sum devold;
         if( abs(sum - devold) <= 1.e-3 ) then conv = 1;
         else conv = 0;
         call symput( 'dev', left(put(sum,12.4)) );
         call symput( 'conv', left(put(conv,3.)) );
      run;

   %end;

   title 'Mean Model';
   ods listing;
   proc print data=meanests;
   run;

   title 'Variance Model';
   proc print data=varests;
   run;

   title;
   %put Number of Iterations: &iter;
   %put Overall Deviance: &dev;
%mend semimod;

%semimod;