Getting Started Example for PROC PLS
/****************************************************************/
/* S A S S A M P L E L I B R A R Y */
/* */
/* NAME: PLSGS */
/* TITLE: Getting Started Example for PROC PLS */
/* PRODUCT: STAT */
/* SYSTEM: ALL */
/* KEYS: regression analysis, */
/* PROCS: PLS */
/* DATA: */
/* */
/* SUPPORT: RDT UPDATE: */
/* REF: */
/* MISC: */
/* */
/****************************************************************/
/* Getting Started Example: Predicting Biological Activity */
data Sample;
input obsnam $ v1-v27 ls ha dt @@;
datalines;
EM1 2766 2610 3306 3630 3600 3438 3213 3051 2907 2844 2796
2787 2760 2754 2670 2520 2310 2100 1917 1755 1602 1467
1353 1260 1167 1101 1017 3.0110 0.0000 0.00
EM2 1492 1419 1369 1158 958 887 905 929 920 887 800
710 617 535 451 368 296 241 190 157 128 106
89 70 65 56 50 0.0000 0.4005 0.00
EM3 2450 2379 2400 2055 1689 1355 1109 908 750 673 644
640 630 618 571 512 440 368 305 247 196 156
120 98 80 61 50 0.0000 0.0000 90.63
EM4 2751 2883 3492 3570 3282 2937 2634 2370 2187 2070 2007
1974 1950 1890 1824 1680 1527 1350 1206 1080 984 888
810 732 669 630 582 1.4820 0.1580 40.00
EM5 2652 2691 3225 3285 3033 2784 2520 2340 2235 2148 2094
2049 2007 1917 1800 1650 1464 1299 1140 1020 909 810
726 657 594 549 507 1.1160 0.4104 30.45
EM6 3993 4722 6147 6720 6531 5970 5382 4842 4470 4200 4077
4008 3948 3864 3663 3390 3090 2787 2481 2241 2028 1830
1680 1533 1440 1314 1227 3.3970 0.3032 50.82
EM7 4032 4350 5430 5763 5490 4974 4452 3990 3690 3474 3357
3300 3213 3147 3000 2772 2490 2220 1980 1779 1599 1440
1320 1200 1119 1032 957 2.4280 0.2981 70.59
EM8 4530 5190 6910 7580 7510 6930 6150 5490 4990 4670 4490
4370 4300 4210 4000 3770 3420 3060 2760 2490 2230 2060
1860 1700 1590 1490 1380 4.0240 0.1153 89.39
EM9 4077 4410 5460 5857 5607 5097 4605 4170 3864 3708 3588
3537 3480 3330 3192 2910 2610 2325 2064 1830 1638 1476
1350 1236 1122 1044 963 2.2750 0.5040 81.75
EM10 3450 3432 3969 4020 3678 3237 2814 2487 2205 2061 2001
1965 1947 1890 1776 1635 1452 1278 1128 981 867 753
663 600 552 507 468 0.9588 0.1450 101.10
EM11 4989 5301 6807 7425 7155 6525 5784 5166 4695 4380 4197
4131 4077 3972 3777 3531 3168 2835 2517 2244 2004 1809
1620 1470 1359 1266 1167 3.1900 0.2530 120.00
EM12 5340 5790 7590 8390 8310 7670 6890 6190 5700 5380 5200
5110 5040 4900 4700 4390 3970 3540 3170 2810 2490 2240
2060 1870 1700 1590 1470 4.1320 0.5691 117.70
EM13 3162 3477 4365 4650 4470 4107 3717 3432 3228 3093 3009
2964 2916 2838 2694 2490 2253 2013 1788 1599 1431 1305
1194 1077 990 927 855 2.1600 0.4360 27.59
EM14 4380 4695 6018 6510 6342 5760 5151 4596 4200 3948 3807
3720 3672 3567 3438 3171 2880 2571 2280 2046 1857 1680
1548 1413 1314 1200 1119 3.0940 0.2471 61.71
EM15 4587 4200 5040 5289 4965 4449 3939 3507 3174 2970 2850
2814 2748 2670 2529 2328 2088 1851 1641 1431 1284 1134
1020 918 840 756 714 1.6040 0.2856 108.80
EM16 4017 4725 6090 6570 6354 5895 5346 4911 4611 4422 4314
4287 4224 4110 3915 3600 3240 2913 2598 2325 2088 1917
1734 1587 1452 1356 1257 3.1620 0.7012 60.00
;
/* Fitting a PLS Model */
proc pls data=sample;
model ls ha dt = v1-v27;
run;
/* Selecting the Number of Factors by Cross Validation */
proc pls data=sample cv=split;
model ls ha dt = v1-v27;
run;
proc pls data=sample cv=split cvtest(seed=12345);
model ls ha dt = v1-v27;
run;
/* Predicting New Observations */
data newobs;
input obsnam $ v1-v27 @@;
datalines;
EM17 3933 4518 5637 6006 5721 5187 4641 4149 3789
3579 3447 3381 3327 3234 3078 2832 2571 2274
2040 1818 1629 1470 1350 1245 1134 1050 987
EM25 2904 2997 3255 3150 2922 2778 2700 2646 2571
2487 2370 2250 2127 2052 1713 1419 1200 984
795 648 525 426 351 291 240 204 162
;
data all;
set sample newobs;
run;
proc pls data=all nfac=3;
model ls ha dt = v1-v27;
output out=pred p=p_ls p_ha p_dt;
run;
proc print data=pred;
where (obsnam in ('EM17','EM25'));
var obsnam p_ls p_ha p_dt;
run;
/* ODS Graphics */
ods graphics on;
proc pls data=sample cv=split cvtest(seed=12345);
model ls ha dt = v1-v27;
run;
ods graphics off;