Various Illustrations of Splines in PROC TRANSREG
/****************************************************************/
/* S A S S A M P L E L I B R A R Y */
/* */
/* NAME: TREGSPL */
/* TITLE: Various Illustrations of Splines in PROC TRANSREG */
/* PRODUCT: STAT */
/* SYSTEM: ALL */
/* KEYS: regression analysis, transformations */
/* PROCS: TRANSREG */
/* DATA: */
/* */
/* SUPPORT: saswfk UPDATE: July 25, 2010 */
/* REF: PROC TRANSREG, DETAILS, SPLINES */
/* MISC: */
/****************************************************************/
title 'An Illustration of Splines and Knots';
* Create in y a discontinuous function of x.;
data a;
x = -0.000001;
do i = 0 to 199;
if mod(i, 50) = 0 then do;
c = ((x / 2) - 5)**2;
if i = 150 then c = c + 5;
y = c;
end;
x = x + 0.1;
y = y - sin(x - c);
output;
end;
run;
ods graphics on;
title2 'A Linear Regression Fit';
proc transreg data=a plots=scatter rsquare;
model identity(y) = identity(x);
run;
title2 'A Quadratic Polynomial Fit';
proc transreg data=A;
model identity(y)=spline(x / degree=2);
run;
title2 'A Cubic Spline Fit with Knots at X=5, 10, 15';
proc transreg data=a;
model identity(y) = spline(x / knots=5 10 15);
run;
data b; /* A is the data set used by transreg */
set a(keep=x y);
x1=x; /* x */
x2=x**2; /* x squared */
x3=x**3; /* x cubed */
x4=(x> 5)*((x-5)**3); /* change in x**3 after 5 */
x5=(x>10)*((x-10)**3); /* change in x**3 after 10 */
x6=(x>15)*((x-15)**3); /* change in x**3 after 15 */
run;
proc reg;
model y=x1-x6;
quit;
title3 'First - Third Derivatives Discontinuous at X=5, 10, 15';
proc transreg data=a;
model identity(y) = spline(x / knots=5 5 5 10 10 10 15 15 15);
run;
data b;
set a(keep=x y);
x1=x; /* x */
x2=x**2; /* x squared */
x3=x**3; /* x cubed */
x4=(x>5) * (x- 5); /* change in x after 5 */
x5=(x>10) * (x-10); /* change in x after 10 */
x6=(x>15) * (x-15); /* change in x after 15 */
x7=(x>5) * ((x-5)**2); /* change in x**2 after 5 */
x8=(x>10) * ((x-10)**2); /* change in x**2 after 10 */
x9=(x>15) * ((x-15)**2); /* change in x**2 after 15 */
x10=(x>5) * ((x-5)**3); /* change in x**3 after 5 */
x11=(x>10) * ((x-10)**3); /* change in x**3 after 10 */
x12=(x>15) * ((x-15)**3); /* change in x**3 after 15 */
run;
proc reg;
model y=x1-x12;
quit;
title3 'Discontinuous Function and Derivatives';
proc transreg data=a;
model identity(y) = spline(x / knots=5 5 5 5 10 10 10 10
15 15 15 15);
run;
title3 'Four Knots';
proc transreg data=a;
model identity(y) = spline(x / nknots=4);
run;
title3 'Nine Knots';
proc transreg data=a;
model identity(y) = spline(x / nknots=9);
run;
title 'An Illustration of Splines and Knots';
title2 'Scoring Spline Variables';
data x;
do i = 1 to 5000;
w = normal(7);
x = normal(7);
z = normal(7);
y = w * w + log(5 + x) + sin(z) + normal(7);
output;
end;
run;
data z;
do i = 1 to 5000;
w = normal(1);
x = normal(1);
z = normal(1);
y = w * w + log(5 + x) + sin(z) + normal(1);
output;
end;
run;
proc transreg data=x solve details ss2;
ods output splinecoef=c;
model identity(y) = spline(w x z / knots=-1.5 to 1.5 by 0.5
exknots=-5 5);
output out=d;
run;
proc transreg data=z design;
model bspl(w x z / knots=-1.5 to 1.5 by 0.5 exknots=-5 5);
output out=b;
run;
proc score data=b score=c out=o1(rename=(spline=bw w=nw));
var w:;
run;
proc score data=b score=c out=o2(rename=(spline=bx x=nx));
var x:;
run;
proc score data=b score=c out=o3(rename=(spline=bz z=nz));
var z:;
run;
data all;
merge d(keep=w x z tw tx tz) o1(keep=nw bw)
o2(keep=nx bx) o3(keep=nz bz);
run;
proc template;
define statgraph twobytwo;
begingraph;
layout lattice / rows=2 columns=2;
layout overlay;
seriesplot y=tw x=w / connectorder=xaxis;
seriesplot y=bw x=nw / connectorder=xaxis;
endlayout;
layout overlay;
seriesplot y=tx x=x / connectorder=xaxis;
seriesplot y=bx x=nx / connectorder=xaxis;
endlayout;
layout overlay;
seriesplot y=tz x=z / connectorder=xaxis;
seriesplot y=bz x=nz / connectorder=xaxis;
endlayout;
endlayout;
endgraph;
end;
run;
proc sgrender data=all template=twobytwo;
run;
data x;
input x @@;
datalines;
1 2 3 4 5 6 7 8 9 10
;
ods output details=d;
proc transreg details design;
model bspline(x / nkn=3);
output out=y;
run;
%let k = 0;
data d;
set d;
length d $ 20;
retain d ' ';
if description ne ' ' then d = description;
if d = 'Degree' then call symput('d', compress(formattedvalue));
if d = 'Number of Knots'
then call symput('k', compress(formattedvalue));
if index(d, 'Knots') and not index(d, 'Number');
keep d numericvalue;
run;
%let nkn = %eval(&d * 2 + &k); /* total number of knots */
%let nb = %eval(&d + 1 + &k); /* number of cols in basis */
proc transpose data=d out=k(drop=_name_) prefix=Knot;
run;
proc print; format k: 20.16;
run;
data b(keep=x:);
if _n_ = 1 then set k; /* read knots from transreg */
array k[&nkn] knot1-knot&nkn; /* knots */
array b[&nb] x_0 - x_%eval(&nb - 1); /* basis */
array w[%eval(2 * &d)]; /* work */
set x;
do i = 1 to &nb; b[i] = 0; end;
* find the index of first knot greater than current data value;
do ki = 1 to &nkn while(k[ki] le x); end;
kki = ki - &d - 1;
* make the basis;
b[1 + kki] = 1;
do j = 1 to &d;
w[&d + j] = k[ki + j - 1] - x;
w[j] = x - k[ki - j];
s = 0;
do i = 1 to j;
t = w[&d + i] + w[j + 1 - i];
if t ne 0.0 then t = b[i + kki] / t;
b[i + kki] = s + w[&d + i] * t;
s = w[j + 1 - i] * t;
end;
b[j + 1 + kki] = s;
end;
run;
proc compare data=y(keep=x:) compare=b
criterion=1e-12 note nosummary;
title3 "should be no differences";
run;
title 'Linear and Nonlinear Regression Functions';
* Generate an Artificial Nonlinear Scatter Plot;
data a;
do i = 1 to 500;
x = i / 2.5;
y = -((x/50)-1.5)**2 + sin(x/8) + sqrt(x)/5 + 2*log(x) + cos(x);
x = x / 21;
if y > 2 then output;
end;
run;
ods select fitplot(persist);
title2 'Linear Regression';
proc transreg data=a;
model identity(y)=identity(x);
run;
title2 'A Monotone Regression Function';
proc transreg data=a;
model identity(y)=mspline(x / nknots=9);
run;
title2 'A Nonlinear Regression Function';
proc transreg data=a;
model identity(y)=spline(x / nknots=9);
run;
title2 'A Nonlinear Regression Function, 100 Knots';
proc transreg data=a;
model identity(y)=spline(x / nknots=100);
run;
ods select all;
title 'Separate Curves, Separate Intercepts';
data a;
do x = -2 to 3 by 0.025;
g = 1;
y = 8*(x*x + 2*cos(x*6)) + 15*normal(7654321);
output;
g = 2;
y = 4*(-x*x + 4*sin(x*4)) - 40 + 15*normal(7654321);
output;
end;
run;
ods select fitplot(persist);
title 'Parallel Lines, Separate Intercepts';
proc transreg data=a solve;
model identity(y)=class(g) identity(x);
run;
title 'Parallel Monotone Curves, Separate Intercepts';
proc transreg data=a;
model identity(y)=class(g) mspline(x / knots=-1.5 to 2.5 by 0.5);
run;
title 'Parallel Curves, Separate Intercepts';
proc transreg data=a solve;
model identity(y)=class(g) spline(x / knots=-1.5 to 2.5 by 0.5);
run;
title 'Separate Slopes, Same Intercept';
proc transreg data=a;
model identity(y)=class(g / zero=none) * identity(x);
run;
title 'Separate Monotone Curves, Same Intercept';
proc transreg data=a;
model identity(y) = class(g / zero=none) *
mspline(x / knots=-1.5 to 2.5 by 0.5);
run;
title 'Separate Curves, Same Intercept';
proc transreg data=a solve;
model identity(y) = class(g / zero=none) *
spline(x / knots=-1.5 to 2.5 by 0.5);
run;
title 'Separate Slopes, Separate Intercepts';
proc transreg data=a;
model identity(y) = class(g / zero=none) | identity(x);
run;
title 'Separate Monotone Curves, Separate Intercepts';
proc transreg data=a;
model identity(y) = class(g / zero=none) |
mspline(x / knots=-1.5 to 2.5 by 0.5);
run;
title 'Separate Curves, Separate Intercepts';
proc transreg data=a solve;
model identity(y) = class(g / zero=none) |
spline(x / knots=-1.5 to 2.5 by 0.5);
run;
ods select all;