Resources

Regression Quantiles

/****************************************************************/
/*          SAS SAMPLE LIBRARY                                  */
/*                                                              */
/*    NAME: REGQUANT                                            */
/*   TITLE: Regression Quantiles                                */
/* PRODUCT: IML                                                 */
/*  SYSTEM: ALL                                                 */
/*    KEYS: MATRIX  REGR                                        */
/*   PROCS: IML                                                 */
/*    DATA:                                                     */
/*                                                              */
/* SUPPORT: Rick Wicklin                UPDATE: SEP2013         */
/*     REF:                                                     */
/*    MISC:                                                     */
/*    HIST: Updated for SAS V8.2 (lichen)                       */
/*                                                              */
/****************************************************************/

proc iml;
/*---------------------------------------------------------*/
/* Routine to find regression quantiles                    */
/* yname: name of dependent variable                       */
/* y: dependent variable                                   */
/* xname: names of independent variables                   */
/* X: independent variables                                */
/* b: estimates                                            */
/* predict: predicted values                               */
/* error: difference of y and predicted.                   */
/* q: quantile                                             */
/*---------------------------------------------------------*/
start rq( yname, y, xname, X, b, predict, error, q);
  bound=1.0e10;
  coef = X`;
  m = nrow(coef);
  n = ncol(coef);
  /*-----------------build rhs and bounds--------------------*/
  e = repeat(1,1,n)`;
  r = {0 0} || ((1-q)*coef*e)`;
  sign = repeat(1,1,m);
  do i=1 to m;
    if r[2+i] < 0 then do;
      sign[i] = -1;
      r[2+i] = -r[2+i];
      coef[i,] = -coef[i,];
    end;
  end;
  l = repeat(0,1,n) || repeat(0,1,m) || -bound || -bound ;
  u = repeat(1,1,n) || repeat(0,1,m) || { . . } ;
  /*-------------build coefficient matrix and basis----------*/
  a = ( y` || repeat(0,1,m) || { -1 0 } ) //
  ( repeat(0,1,n) || repeat(-1,1,m) || { 0 -1 } ) //
  ( coef || I(m) || repeat(0,m,2)) ;
  /*----------------find the optimal solution----------------*/
  cost = j(1,n+m+2,0);
  cost[n+m+1] = 1.0;
  call lpsolve(rc,optimum,p,d,rcost,
               cost,a,r,-1,j(m+2,1,'E'),,l,u);

  /*---------------- report the solution-----------------------*/
  variable = xname`; b=d[3:m+2];
  do i=1 to m;
    b[i] = b[i] * sign[i];
  end;
  predict = X*b;
  error = y - predict;
  wsum = sum ( choose(error<0 , (q-1)*error , q*error) );

  label = 'Estimation for ' + yname;
  desc = q // n // wsum;
  rownames = {'Regression Quantile', 'Number of Observations',
              'Sum of Weighted Absolute Errors'};
  print desc[L=label r=rownames];
  print b[r=variable];
  print X y predict error;
finish rq;

z = { 3.929 1790 ,
      5.308 1800 ,
      7.239 1810 ,
      9.638 1820 ,
     12.866 1830 ,
     17.069 1840 ,
     23.191 1850 ,
     31.443 1860 ,
     39.818 1870 ,
     50.155 1880 ,
     62.947 1890 ,
     75.994 1900 ,
     91.972 1910 ,
    105.710 1920 ,
    122.775 1930 ,
    131.669 1940 ,
    151.325 1950 ,
    179.323 1960 ,
    203.211 1970 };

y=z[,1];
x=repeat(1,19,1)||z[,2]||z[,2]##2;
run rq('pop',y,{'intercpt' 'year' 'yearsq'},x,b1,pred,resid,.5);
/* Compare L1 residuals with least squares residuals  */
/* Compute the least squares residuals                */
LSResid=y-x*inv(x`*x)*x`*y;
L1Resid = resid;
t = z[,2];
create Residuals var{t L1Resid LSResid}; append; close Residuals;
quit;

proc sgplot data=Residuals;
  scatter x=t y=L1Resid / LEGENDLABEL= "L(1) residuals";
  scatter x=t y=LSResid / LEGENDLABEL= "Least squares residuals";
  yaxis label="Residuals";
  refline 0 / axis=y;
  run;

proc iml;
z = { 3.929 1790 ,
      5.308 1800 ,
      7.239 1810 ,
      9.638 1820 ,
     12.866 1830 ,
     17.069 1840 ,
     23.191 1850 ,
     31.443 1860 ,
     39.818 1870 ,
     50.155 1880 ,
     62.947 1890 ,
     75.994 1900 ,
     91.972 1910 ,
    105.710 1920 ,
    122.775 1930 ,
    131.669 1940 ,
    151.325 1950 ,
    179.323 1960 ,
    203.211 1970 };
y=z[,1];
x=repeat(1,19,1)||z[,2]||z[,2]##2;

b0 = {1 1 1};              /* initial value             */
optn = j(4,1,.);           /* options vector            */

optn[1]= .;                /* gamma default             */
optn[2]= 5;                /* print all                 */
optn[3]= 0;                /* McKean-Schradar variance  */
optn[4]= 1;                /* convergence test          */

call LAV(rc, xr, x, y, b0, optn);