Survival Curve for Interval Censored Data

/****************************************************************/
/*          S A S   S A M P L E   L I B R A R Y                 */
/*                                                              */
/*    NAME: IMNLPEX6                                            */
/*   TITLE: Survival Curve for Interval Censored Data           */
/* PRODUCT: IML                                                 */
/*  SYSTEM: ALL                                                 */
/*    KEYS:                                                     */
/*   PROCS: IML                                                 */
/*    DATA:                                                     */
/*                                                              */
/* SUPPORT: saswmh/sasghg               UPDATE: frwick 5/2015   */
/*     REF: Technical Report Examples                           */
/*    MISC:                                                     */
/*                                                              */
/****************************************************************/

data carcin;
   input id l r @@;
   datalines;
1  20  22    11  30  32     21  22  24    31  34  36
2  22  24    12  32  34     22  22  24    32  34  36
3  26  28    13  32  34     23  28  30    33  36  38
4  26  28    14  32  34     24  28  30    34  38  40
5  26  28    15  34  36     25  32  34    35  38  40
6  26  28    16  36  38     26  32  34    36  42  44
7  28  30    17  42  44     27  32  34    37  42  44
8  28  30    18  30  50     28  32  34    38  46  48
9  30  32    19  34  50     29  32  34    39  28  50
10 30  32    20  20  22     30  32  34    40  48  50
;

proc iml;
use carcin;
read all var{l r};
close carcin;

nobs= nrow(l);
/*********************************************************
   construct the nonoverlapping intervals (Q,P) and
   determine the number of pseudo-parameters (NPARM)
*********************************************************/
pp= unique(r); npp= ncol(pp);
qq= unique(l); nqq= ncol(qq);
q= j(1,npp, .);
do;
   do i= 1 to npp;
      do j= 1 to nqq;
         if ( qq[j] < pp[i] ) then q[i]= qq[j];
      end;
      if q[i] = qq[nqq] then goto lab1;
   end;
lab1:
end;

if i > npp then nq= npp;
else            nq= i;
q= unique(q[1:nq]);
nparm= ncol(q);
p= j(1,nparm, .);
do i= 1 to nparm;
   do j= npp to 1 by -1;
      if ( pp[j] > q[i] ) then p[i]= pp[j];
   end;
end;

/********************************************************
   generate the X-matrix for the likelihood
********************************************************/
_x= j(nobs, nparm, 0);
do j= 1 to nparm;
   _x[,j]= choose(l <= q[j]  & p[j] <= r, 1, 0);
end;

/********************************************************
  log-likelihood function (LL)
********************************************************/
start LL(theta) global(_x,nparm);
   xlt= log(_x * theta`);
   f= xlt[+];
   return(f);
finish LL;

/********************************************************
  gradient vector (GRAD)
*******************************************************/
start GRAD(theta) global(_x,nparm);
   g= j(1,nparm,0);
   tmp= _x # (1 /  (_x * theta`) );
   g= tmp[+,];
   return(g);
finish GRAD;

/*************************************************************
  estimate the pseudo-parameters using quasi-newton technique
*************************************************************/
/* options */
optn= {1 2};

/* constraints */
con= j(3, nparm + 2, .);
con[1, 1:nparm]= 1.e-6;
con[2:3, 1:nparm]= 1;
con[3,nparm + 1]=0;
con[3,nparm + 2]=1;

/* initial estimates */
x0= j(1, nparm, 1/nparm);

/* call the optimization routine */
call nlpqn(rc,rx,"LL",x0,optn,con,,,,"GRAD");

/*************************************************************
 survival function estimate (SDF)
************************************************************/
tmp1= cusum(rx[nparm:1]);
sdf= tmp1[nparm-1:1];

/*************************************************************
 covariance matrix of the first nparm-1 pseudo-parameters (SIGMA2)
*************************************************************/
mm= nparm - 1;
_x= _x - _x[,nparm] * (j(1, mm, 1) || {0});
h= j(mm, mm, 0);
ixtheta= 1 / (_x * ((rx[,1:mm]) || {1})`);
if _zfreq then
   do i= 1 to nobs;
      rowtmp= ixtheta[i] # _x[i,1:mm];
      h= h + (_freq[i] # (rowtmp` * rowtmp));
   end;
else do i= 1 to nobs;
   rowtmp= ixtheta[i] # _x[i,1:mm];
   h= h + (rowtmp` * rowtmp);
end;
sigma2= inv(h);

/*************************************************************
 standard errors of the estimated survival curve (SIGMA3)
*************************************************************/
sigma3= j(mm, 1, 0);
tmp1= sigma3;
do i= 1 to mm;
   tmp1[i]= 1;
   sigma3[i]= sqrt(tmp1` * sigma2 * tmp1);
end;

/*************************************************************
 95% confidence limits for the survival curve (LCL,UCL)
*************************************************************/

/* confidence limits */
tmp1= probit(.975);
*print tmp1;
tmp1= tmp1 * sigma3;
lcl= choose(sdf > tmp1, sdf - tmp1, 0);
ucl= sdf + tmp1;
ucl= choose( ucl > 1., 1., ucl);

/*************************************************************
 print estimates of pseudo-parameters
*************************************************************/
q= q`;
p= p`;
theta= rx`;
ParamEst = q || p || theta;
print ParamEst[L="Parameter Estimates"
              colname={Q P THETA} format=BEST9.];

/*************************************************************
 print survival curve estimates and confidence limits
*************************************************************/
left= {0} // p;
right= q // p[nparm];
sdf= {1} // sdf // {0};
lcl= {.} // lcl //{.};
ucl= {.} // ucl //{.};
SurvEst = left || right || sdf || lcl || ucl;
print SurvEst[L="Survival Curve Estimates and 95% Confidence Intervals"
          colname={left right estimate lower upper}];
quit;