## Various Illustrations of Splines in PROC TRANSREG

```/****************************************************************/
/*          S A S   S A M P L E   L I B R A R Y                 */
/*                                                              */
/*    NAME: TREGSPL                                             */
/*   TITLE: Various Illustrations of Splines in PROC TRANSREG   */
/* PRODUCT: STAT                                                */
/*  SYSTEM: ALL                                                 */
/*    KEYS: regression analysis, transformations                */
/*   PROCS: TRANSREG                                            */
/*    DATA:                                                     */
/*                                                              */
/* SUPPORT: saswfk                UPDATE: July 25, 2010         */
/*     REF: PROC TRANSREG, DETAILS, SPLINES                     */
/*    MISC:                                                     */
/****************************************************************/

title 'An Illustration of Splines and Knots';

* Create in y a discontinuous function of x.;

data a;
x = -0.000001;
do i = 0 to 199;
if mod(i, 50) = 0 then do;
c = ((x / 2) - 5)**2;
if i = 150 then c = c + 5;
y = c;
end;
x = x + 0.1;
y = y - sin(x - c);
output;
end;
run;

ods graphics on;

title2 'A Linear Regression Fit';
proc transreg data=a plots=scatter rsquare;
model identity(y) = identity(x);
run;

proc transreg data=A;
model identity(y)=spline(x / degree=2);
run;

title2 'A Cubic Spline Fit with Knots at X=5, 10, 15';

proc transreg data=a;
model identity(y) = spline(x / knots=5 10 15);
run;

data b;             /* A is the data set used by transreg */
set a(keep=x y);
x1=x;                       /* x                       */
x2=x**2;                    /* x squared               */
x3=x**3;                    /* x cubed                 */
x4=(x> 5)*((x-5)**3);       /* change in x**3 after  5 */
x5=(x>10)*((x-10)**3);      /* change in x**3 after 10 */
x6=(x>15)*((x-15)**3);      /* change in x**3 after 15 */
run;

proc reg;
model y=x1-x6;
run; quit;

title3 'First - Third Derivatives Discontinuous at X=5, 10, 15';

proc transreg data=a;
model identity(y) = spline(x / knots=5 5 5 10 10 10 15 15 15);
run;

data b;
set a(keep=x y);
x1=x;                        /* x                       */
x2=x**2;                     /* x squared               */
x3=x**3;                     /* x cubed                 */
x4=(x>5)   * (x- 5);         /* change in x    after  5 */
x5=(x>10)  * (x-10);         /* change in x    after 10 */
x6=(x>15)  * (x-15);         /* change in x    after 15 */
x7=(x>5)   * ((x-5)**2);     /* change in x**2 after  5 */
x8=(x>10)  * ((x-10)**2);    /* change in x**2 after 10 */
x9=(x>15)  * ((x-15)**2);    /* change in x**2 after 15 */
x10=(x>5)  * ((x-5)**3);     /* change in x**3 after  5 */
x11=(x>10) * ((x-10)**3);    /* change in x**3 after 10 */
x12=(x>15) * ((x-15)**3);    /* change in x**3 after 15 */
run;

proc reg;
model y=x1-x12;
run; quit;

title3 'Discontinuous Function and Derivatives';

proc transreg data=a;
model identity(y) = spline(x / knots=5 5 5 5 10 10 10 10
15 15 15 15);
run;

title3 'Four Knots';

proc transreg data=a;
model identity(y) = spline(x / nknots=4);
run;

title3 'Nine Knots';

proc transreg data=a;
model identity(y) = spline(x / nknots=9);
run;

title 'An Illustration of Splines and Knots';
title2 'Scoring Spline Variables';

data x;
do i = 1 to 5000;
w = normal(7);
x = normal(7);
z = normal(7);
y = w * w + log(5 + x) + sin(z) + normal(7);
output;
end;
run;

data z;
do i = 1 to 5000;
w = normal(1);
x = normal(1);
z = normal(1);
y = w * w + log(5 + x) + sin(z) + normal(1);
output;
end;
run;

proc transreg data=x solve details ss2;
ods output splinecoef=c;
model identity(y) = spline(w x z / knots=-1.5 to 1.5 by 0.5
exknots=-5 5);
output out=d;
run;

proc transreg data=z design;
model bspl(w x z / knots=-1.5 to 1.5 by 0.5 exknots=-5 5);
output out=b;
run;

proc score data=b score=c out=o1(rename=(spline=bw w=nw));
var w:;
run;

proc score data=b score=c out=o2(rename=(spline=bx x=nx));
var x:;
run;

proc score data=b score=c out=o3(rename=(spline=bz z=nz));
var z:;
run;

data all;
merge d(keep=w x z tw tx tz) o1(keep=nw bw)
o2(keep=nx bx) o3(keep=nz bz);
run;

proc template;
define statgraph twobytwo;
begingraph;
layout lattice / rows=2 columns=2;
layout overlay;
seriesplot y=tw x=w  / connectorder=xaxis;
seriesplot y=bw x=nw / connectorder=xaxis;
endlayout;
layout overlay;
seriesplot y=tx x=x  / connectorder=xaxis;
seriesplot y=bx x=nx / connectorder=xaxis;
endlayout;
layout overlay;
seriesplot y=tz x=z  / connectorder=xaxis;
seriesplot y=bz x=nz / connectorder=xaxis;
endlayout;
endlayout;
endgraph;
end;
run;

proc sgrender data=all template=twobytwo;
run;

data x;
input x @@;
datalines;
1 2 3 4 5 6 7 8 9 10
;

ods output details=d;
proc transreg details design;
model bspline(x / nkn=3);
output out=y;
run;

%let k = 0;
data d;
set d;
length d \$ 20;
retain d ' ';
if description ne ' ' then d = description;
if d = 'Degree' then call symput('d', compress(formattedvalue));
if d = 'Number of Knots'
then call symput('k', compress(formattedvalue));
if index(d, 'Knots') and not index(d, 'Number');
keep d numericvalue;
run;

%let nkn = %eval(&d * 2 + &k); /* total number of knots   */
%let nb  = %eval(&d + 1 + &k); /* number of cols in basis */

proc transpose data=d out=k(drop=_name_) prefix=Knot;
run;

proc print; format k: 20.16;
run;

data b(keep=x:);
if _n_ = 1 then set k; /* read knots from transreg */
array k[&nkn] knot1-knot&nkn;        /* knots      */
array b[&nb] x_0 - x_%eval(&nb - 1); /* basis      */
array w[%eval(2 * &d)];              /* work       */
set x;
do i = 1 to &nb; b[i] = 0; end;

* find the index of first knot greater than current data value;
do ki = 1 to &nkn while(k[ki] le x); end;
kki = ki - &d - 1;

* make the basis;
b[1 + kki] = 1;
do j = 1 to &d;
w[&d + j] = k[ki + j - 1] - x;
w[j] = x - k[ki - j];
s = 0;
do i = 1 to j;
t = w[&d + i] + w[j + 1 - i];
if t ne 0.0 then t = b[i + kki] / t;
b[i + kki] = s + w[&d + i] * t;
s = w[j + 1 - i] * t;
end;
b[j + 1 + kki] = s;
end;
run;

proc compare data=y(keep=x:) compare=b
criterion=1e-12 note nosummary;
title3 "should be no differences";
run;

title 'Linear and Nonlinear Regression Functions';

* Generate an Artificial Nonlinear Scatter Plot;
data a;
do i = 1 to 500;
x = i / 2.5;
y = -((x/50)-1.5)**2 + sin(x/8) + sqrt(x)/5 + 2*log(x) + cos(x);
x = x / 21;
if y > 2 then output;
end;
run;

ods select fitplot(persist);

title2 'Linear Regression';

proc transreg data=a;
model identity(y)=identity(x);
run;

title2 'A Monotone Regression Function';

proc transreg data=a;
model identity(y)=mspline(x / nknots=9);
run;

title2 'A Nonlinear Regression Function';

proc transreg data=a;
model identity(y)=spline(x / nknots=9);
run;

title2 'A Nonlinear Regression Function, 100 Knots';

proc transreg data=a;
model identity(y)=spline(x / nknots=100);
run;

ods select all;

title 'Separate Curves, Separate Intercepts';

data a;
do x = -2 to 3 by 0.025;
g = 1;
y = 8*(x*x + 2*cos(x*6)) + 15*normal(7654321);
output;
g = 2;
y = 4*(-x*x + 4*sin(x*4)) - 40 + 15*normal(7654321);
output;
end;
run;

ods select fitplot(persist);

title 'Parallel Lines, Separate Intercepts';

proc transreg data=a solve;
model identity(y)=class(g) identity(x);
run;

title 'Parallel Monotone Curves, Separate Intercepts';

proc transreg data=a;
model identity(y)=class(g) mspline(x / knots=-1.5 to 2.5 by 0.5);
run;

title 'Parallel Curves, Separate Intercepts';

proc transreg data=a solve;
model identity(y)=class(g) spline(x / knots=-1.5 to 2.5 by 0.5);
run;

title 'Separate Slopes, Same Intercept';

proc transreg data=a;
model identity(y)=class(g / zero=none) * identity(x);
run;

title 'Separate Monotone Curves, Same Intercept';

proc transreg data=a;
model identity(y) = class(g / zero=none) *
mspline(x / knots=-1.5 to 2.5 by 0.5);
run;

title 'Separate Curves, Same Intercept';

proc transreg data=a solve;
model identity(y) = class(g / zero=none) *
spline(x / knots=-1.5 to 2.5 by 0.5);
run;

title 'Separate Slopes, Separate Intercepts';

proc transreg data=a;
model identity(y) = class(g / zero=none) | identity(x);
run;

title 'Separate Monotone Curves, Separate Intercepts';

proc transreg data=a;
model identity(y) = class(g / zero=none) |
mspline(x / knots=-1.5 to 2.5 by 0.5);
run;

title 'Separate Curves, Separate Intercepts';

proc transreg data=a solve;
model identity(y) = class(g / zero=none) |
spline(x / knots=-1.5 to 2.5 by 0.5);
run;
ods select all;

```