Fractional Factorial with Repeated Measures
/****************************************************************/
/* S A S S A M P L E L I B R A R Y */
/* */
/* NAME: GLMFRACT */
/* TITLE: Fractional Factorial with Repeated Measures */
/* PRODUCT: STAT */
/* SYSTEM: ALL */
/* KEYS: analysis of variance, repeated measures analysis */
/* PROCS: GLM PLOT */
/* DATA: */
/* */
/* SUPPORT: WSS UPDATE: January 2010 */
/* REF: */
/* MISC: */
/* */
/****************************************************************/
*-----------------------------------------------------------------
The data are from a simulation experiment comparing three
methods for initializing a multidimensional scaling algorithm.
The methods are OLD, NEW, and RANDOM. The dependent variable is
CPU time. Smaller values are better.
------------------------------------------------------------------;
proc format;
value level
1='Low'
2='Medium'
3='High';
run;
data init;
input stim sub dim error old new random;
array x old new random;
array xl OldL NewL RandomL;
do over x; xl=log(x); end;
label stim='Number of Stimuli'
sub='Number of Subjects'
dim='Number of Dimensions'
error='Error Level'
old='Old Initialization Method'
new='New Initialization Method'
random='Random Initialization'
oldl='LOG Old Initialization Method'
newl='LOG New Initialization Method'
randoml='LOG Random Initialization';
format error level.;
datalines;
10 10 5 1 26 40 50
20 20 4 1 187 99 182
30 30 3 1 362 245 415
40 40 2 1 623 511 790
50 50 6 1 4252 2034 4266
10 20 2 1 17 18 22
20 30 6 1 426 205 481
30 40 5 1 750 455 936
40 50 4 1 1338 973 1853
50 10 3 1 417 306 429
10 30 4 2 109 66 109
20 40 3 2 240 171 230
30 50 2 2 446 362 539
40 10 6 2 928 352 662
50 20 5 2 1270 832 1652
10 40 6 3 218 158 359
20 50 5 3 638 322 624
30 10 4 3 222 150 241
40 20 3 3 692 352 478
50 30 2 3 687 602 1007
10 50 3 3 258 102 246
20 10 2 3 38 38 45
30 20 6 3 605 335 1000
40 30 5 3 1093 709 1253
50 40 4 3 1990 1159 2501
;
proc print label;
id stim sub dim error;
var random old new;
run;
*-----------------------------------------------------------------
A preliminary analysis is done to get univariate statistics and
do a residual-by-predicted plot.
------------------------------------------------------------------;
title 'Analysis of CPU Times';
proc glm data=init;
class stim sub dim error;
model random old new = stim sub dim error / ss1;
output out=out r=RandomR OldR NewR
p=RandomP OldP NewP;
run; quit;
title2 'Plot of Residuals by Predicted Values';
proc sgplot data=out;
scatter y=oldr x=oldp;
run;
proc sgplot data=out;
scatter y=newr x=newp;
run;
proc sgplot data=out;
scatter y=randomr x=randomp;
run;
*-----------------------------------------------------------------
The first plot shows nonlinearity and unequal variances, so we
try again with logarithms instead of raw CPU times.
------------------------------------------------------------------;
title 'Analysis of LOG(CPU Times)';
proc glm data=init;
class stim sub dim error;
model randoml oldl newl = stim sub dim error / ss1;
repeated method 3 / nou summary;
output out=outl r=RandomR OldR NewR
p=RandomP OldP NewP;
lsmeans stim / out=ostim;
lsmeans sub / out=osub;
lsmeans dim / out=odim;
lsmeans error / out=oerror;
run; quit;
title2 'Plot of Residuals by Predicted Values';
proc sgplot data=outl;
scatter y=oldr x=oldp;
run;
proc sgplot data=out;
scatter y=newr x=newp;
run;
proc sgplot data=out;
scatter y=randomr x=randomp;
run;
title2 'Plot of Marginal Means';
proc sgplot data=ostim;
scatter y=lsmean x=stim / markerchar=_name_;
label lsmean='LOG(CPU Time)';
run;
proc sgplot data=osub;
scatter y=lsmean x=sub / markerchar=_name_;
label lsmean='LOG(CPU Time)';
run;
proc sgplot data=odim;
scatter y=lsmean x=dim / markerchar=_name_;
label lsmean='LOG(CPU Time)';
run;
proc sgplot data=oerror;
scatter y=lsmean x=error / markerchar=_name_;
label lsmean='LOG(CPU Time)';
run;
*-----------------------------------------------------------------
A possible outlier is evident in the lower left corner of the
residual-by-predicted plots for RANDOM and NEW. The analysis is
repeated with the outlier removed by a WHERE statement. The
design is no longer orthogonal, so SS3 is specified in the MODEL
statement instead of SS2.
------------------------------------------------------------------;
title 'Analysis of LOG(CPU Times) with Outlier Removed';
proc glm data=outl(drop = oldr newr randomp oldp newp
rename = (randomr=resid));
where resid > -.3;
class stim sub dim error;
model randoml oldl newl = stim sub dim error / ss3;
repeated method 3 / nou summary;
output out=out r=RandomR OldR NewR
p=RandomP OldP NewP;
lsmeans stim / out=ostim;
lsmeans sub / out=osub;
lsmeans dim / out=odim;
lsmeans error / out=oerror;
run; quit;
title2 'Plot of Residuals by Predicted Values';
proc sgplot data=out;
scatter y=oldr x=oldp;
run;
proc sgplot data=out;
scatter y=newr x=newp;
run;
proc sgplot data=out;
scatter y=randomr x=randomp;
run;
title2 'Plot of Marginal Means';
proc sgplot data=ostim;
scatter y=lsmean x=stim / markerchar=_name_;
label lsmean='LOG(CPU Time)';
run;
proc sgplot data=osub;
scatter y=lsmean x=sub / markerchar=_name_;
label lsmean='LOG(CPU Time)';
run;
proc sgplot data=odim;
scatter y=lsmean x=dim / markerchar=_name_;
label lsmean='LOG(CPU Time)';
run;
proc sgplot data=oerror;
scatter y=lsmean x=error / markerchar=_name_;
label lsmean='LOG(CPU Time)';
run;