Regression Quantiles
/****************************************************************/
/* SAS SAMPLE LIBRARY */
/* */
/* NAME: REGQUANT */
/* TITLE: Regression Quantiles */
/* PRODUCT: IML */
/* SYSTEM: ALL */
/* KEYS: MATRIX REGR */
/* PROCS: IML */
/* DATA: */
/* */
/* SUPPORT: Rick Wicklin UPDATE: SEP2013 */
/* REF: */
/* MISC: */
/* HIST: Updated for SAS V8.2 (lichen) */
/* */
/****************************************************************/
proc iml;
/*---------------------------------------------------------*/
/* Routine to find regression quantiles */
/* yname: name of dependent variable */
/* y: dependent variable */
/* xname: names of independent variables */
/* X: independent variables */
/* b: estimates */
/* predict: predicted values */
/* error: difference of y and predicted. */
/* q: quantile */
/*---------------------------------------------------------*/
start rq( yname, y, xname, X, b, predict, error, q);
bound=1.0e10;
coef = X`;
m = nrow(coef);
n = ncol(coef);
/*-----------------build rhs and bounds--------------------*/
e = repeat(1,1,n)`;
r = {0 0} || ((1-q)*coef*e)`;
sign = repeat(1,1,m);
do i=1 to m;
if r[2+i] < 0 then do;
sign[i] = -1;
r[2+i] = -r[2+i];
coef[i,] = -coef[i,];
end;
end;
l = repeat(0,1,n) || repeat(0,1,m) || -bound || -bound ;
u = repeat(1,1,n) || repeat(0,1,m) || { . . } ;
/*-------------build coefficient matrix and basis----------*/
a = ( y` || repeat(0,1,m) || { -1 0 } ) //
( repeat(0,1,n) || repeat(-1,1,m) || { 0 -1 } ) //
( coef || I(m) || repeat(0,m,2)) ;
/*----------------find the optimal solution----------------*/
cost = j(1,n+m+2,0);
cost[n+m+1] = 1.0;
call lpsolve(rc,optimum,p,d,rcost,
cost,a,r,-1,j(m+2,1,'E'),,l,u);
/*---------------- report the solution-----------------------*/
variable = xname`; b=d[3:m+2];
do i=1 to m;
b[i] = b[i] * sign[i];
end;
predict = X*b;
error = y - predict;
wsum = sum ( choose(error<0 , (q-1)*error , q*error) );
label = 'Estimation for ' + yname;
desc = q // n // wsum;
rownames = {'Regression Quantile', 'Number of Observations',
'Sum of Weighted Absolute Errors'};
print desc[L=label r=rownames];
print b[r=variable];
print X y predict error;
finish rq;
z = { 3.929 1790 ,
5.308 1800 ,
7.239 1810 ,
9.638 1820 ,
12.866 1830 ,
17.069 1840 ,
23.191 1850 ,
31.443 1860 ,
39.818 1870 ,
50.155 1880 ,
62.947 1890 ,
75.994 1900 ,
91.972 1910 ,
105.710 1920 ,
122.775 1930 ,
131.669 1940 ,
151.325 1950 ,
179.323 1960 ,
203.211 1970 };
y=z[,1];
x=repeat(1,19,1)||z[,2]||z[,2]##2;
run rq('pop',y,{'intercpt' 'year' 'yearsq'},x,b1,pred,resid,.5);
/* Compare L1 residuals with least squares residuals */
/* Compute the least squares residuals */
LSResid=y-x*inv(x`*x)*x`*y;
L1Resid = resid;
t = z[,2];
create Residuals var{t L1Resid LSResid}; append; close Residuals;
quit;
proc sgplot data=Residuals;
scatter x=t y=L1Resid / LEGENDLABEL= "L(1) residuals";
scatter x=t y=LSResid / LEGENDLABEL= "Least squares residuals";
yaxis label="Residuals";
refline 0 / axis=y;
run;
proc iml;
z = { 3.929 1790 ,
5.308 1800 ,
7.239 1810 ,
9.638 1820 ,
12.866 1830 ,
17.069 1840 ,
23.191 1850 ,
31.443 1860 ,
39.818 1870 ,
50.155 1880 ,
62.947 1890 ,
75.994 1900 ,
91.972 1910 ,
105.710 1920 ,
122.775 1930 ,
131.669 1940 ,
151.325 1950 ,
179.323 1960 ,
203.211 1970 };
y=z[,1];
x=repeat(1,19,1)||z[,2]||z[,2]##2;
b0 = {1 1 1}; /* initial value */
optn = j(4,1,.); /* options vector */
optn[1]= .; /* gamma default */
optn[2]= 5; /* print all */
optn[3]= 0; /* McKean-Schradar variance */
optn[4]= 1; /* convergence test */
call LAV(rc, xr, x, y, b0, optn);