Resources

Tukey-Type Data Smoothing

 /****************************************************************/
 /*          S A S   S A M P L E   L I B R A R Y                 */
 /*                                                              */
 /*    NAME: TUKEY                                               */
 /*   TITLE: Tukey-Type Data Smoothing                           */
 /* PRODUCT: IML                                                 */
 /*  SYSTEM: ALL                                                 */
 /*    KEYS: SSTAT TIME                                          */
 /*   PROCS: IML PLOT                                            */
 /*    DATA:                                                     */
 /*                                                              */
 /* SUPPORT: RHD                         UPDATE:                 */
 /*     REF:                                                     */
 /*    MISC: CONVERTED FROM MATRIX TO IML USING MATIML           */
 /****************************************************************/
OPTIONS LS=132 PS=60;
******************************************************************
*  MACRO COMMANDS FOR TUKEY TYPE SMOOTHING                       *
*      BY DON HENDERSON USDA-ARS                                 *
*                                                                *
*  PRESENTED AT SAS USERS GROUP INTERNATIONAL-THIRD ANNUAL       *
*  CONFERENCE AS A PROC MATRIX PROGRAM.    PROC MATIML WAS USED  *
*  TO CREATE THIS PROGRAM USES IML INSTEAD OF PROC MATRIX.       *
******************************************************************
*MACROS PICKMED, MED3VARJ, INDATA, MED3, THREE_R, SPLIT, HAN,    *
*OUTDATA, AND PLOTTER ALLOWS THE USER TO PERFORM MOST OF THE     *
*DATA SMOOTHING TECHNIQUES PRESENTED BY JOHN TUKEY IN HIS BOOK   *
*EXPLORATORY DATA ANALYSIS.  THE RESULTS ARE STORED IN TWO SAS   *
*DATA SETS.  ONE CONTAINS THE TREND (SMOOTH) AND THE OTHER       *
*CONTAINS THE RESIUALS.                                          *
*                                                                *
*TWO OF THE MACROS ARE FOR INTERNAL USE ONLY, I.E., USED IN THE  *
*OTHER MACROS OR FOR USE IN BUILDING OTHER SMOOTHING COMPONENTS. *
*ONE MACRO PULLS IN THE DATA SET, ONE OUTPUTS THE TREND AND THE  *
*RESIDUALS INTO DATA SETS, ONE PLOTS THE RESULTS, AND THE        *
*REMAINING FOUR EACH PERFORM ONE OF THE SMOOTHING COMPONENTS.    *
*                                                                *
*THE FOLLOWING STATEMENTS ARE NECESSARY TO SMOOTH A SET OF DATA: *
*                                                                *
*     MACRO INVAR  A  %                                          *
*     MACRO IND  B  %                                            *
*     MACRO XVAR  C  %                                           *
*     MACRO SMOOTH  D  %                                         *
*     MACRO RESID  E  %                                          *
*     INDATA                                                     *
*     F                                                          *
*     OUTDATA                                                    *
*     PLOTTER                                                    *
*                                                                *
*    WHERE A IS THE LIST OF VARIABLES TO BE SMOOTHED             *
*          B IS THE NAME OF THE INPUT DATA SET                   *
*          C IS THE X VARIABLE FOR PLOTTING(ONLY IF PLOTTER USED)*
*          D IS THE OUTPUT DATA SET OF SMOOTHE VALUES            *
*          E IS THE OUTPUT DATA SET OF RESIDUALS                 *
*          F IS ANY COMBINATION OF MED3, THREE_R, SPLIT, AND HAN *
*                                                                *
******************************************************************

*INTERNAL MACRO TO FIND THE MEDIAN OF 3 VARIABLES;

MACRO PICKMED;
   IF ( A1>=A2) THEN IF ( A2>=A3) THEN  MED=A2;
   ELSE IF ( A1<A3) THEN  MED=A1;
   ELSE  MED=A3;
   ELSE IF ( A3>=A2) THEN  MED=A2;
   ELSE IF ( A3>A1) THEN  MED=A3;
   ELSE  MED=A1;
%;


*INTERNAL MACRO TO SMOOTH USING MEDIANS OF 3. ;
*COLUMN (VARIABLE) J ONLY;

MACRO MED3VARJ;
  DO I={2} TO OBS-{1};
     A1= INPUT[I-{1},J];
     A2= INPUT[I,J];
     A3= INPUT[I+{1},J];
     PICKMED;
     SMOOTH[I,J]=MED;
     END;
  A1= INPUT[{1},J];
  A2= SMOOTH[{2},J];
  A3= SMOOTH[{2},J]-{2}*( SMOOTH[{3},J]- SMOOTH[{2},J]);
  PICKMED;
  SMOOTH[{1},J]=MED;
  A1= INPUT[OBS,J];
  A2= SMOOTH[OBS-{1},J];
  A3= SMOOTH[OBS-{1},J]-{2}*( SMOOTH[OBS-{2},J]-
       SMOOTH[OBS-{1},J]);
  PICKMED;
  SMOOTH[OBS,J]=MED;
%;


*INPUT THE DATA INTO PROC MATIML. BEGINS SMOOTHING SEQUENCE;

MACRO INDATA;
  PROC IML;
  USE IND (KEEP=INVAR);
  READ ALL INTO SMOOTH [COLNAME=VARID];
  INPUT=SMOOTH;
  OBS= NROW(INPUT);
  VARS= NCOL(INPUT);
%;


*REPEATED MEDIANS OF 3 SMOOTH, UNTIL NO CHANGE;
*BY FURTHER MEDIAN OF 3 SMOOTHING;

MACRO THREE_R;
  DO J={1} TO VARS;
     MED3VARJ;
     IF ( ANY( SMOOTH[,J]^= INPUT[,J])) THEN  DO;
        INPUT[,J]= SMOOTH[,J];
        J=J-{1};
        END;
     END;
%;


*SPLIT THE PEAKS AND VALLEYS OF TWO POINTS;
*USING THE END-VALUE RULE;

MACRO SPLIT;
  DO J={1} TO VARS;
     DO I={3} TO OBS-{3};
        IF ( INPUT[I,J]= INPUT[I+{1},J]
             & SIGN( INPUT[I,J]- INPUT[I-{1},J])^={0}
             & SIGN( INPUT[I,J]- INPUT[I-{1},J])=-
                 SIGN(INPUT[I+{2},J]-
             INPUT[I+{1},J])) THEN
           DO;
           A1= INPUT[I,J];
           A2= INPUT[I-{1},J];
           A3= INPUT[I-{1},J]-{2}*( INPUT[I-{2},J]-
                 INPUT[I-{1},J]);
           PICKMED;
           SMOOTH[I,J]=MED;
           A1= INPUT[I+{1},J];
           A2= INPUT[I+{2},J];
           A3= INPUT[I+{2},J]-{2}*( INPUT[I+{3},J]-
                  INPUT[I+{2},J]);
           PICKMED;
           SMOOTH[I+{1},J]=MED;
           END;
        END;
     END;
  INPUT=SMOOTH;
  THREE_R;
%;


*MEDIANS OF 3 SMOOTH. ALL VARIABLES;

MACRO MED3;
  DO J={1} TO VARS;
     MED3VARJ;
     END;
  INPUT=SMOOTH;
%;


*A 1/4,1/2,1/4 ARITHMETIC SMOOTH. END VALUES COPIED ON;

MACRO HAN;
  DO J={1} TO VARS;
     DO I={2} TO OBS-{1};
        SMOOTH[I,J]=( INPUT[I,J]+( INPUT[I-{1},J]+
            INPUT[I+{1},J])*{.5})*{.5};
        END;
     END;
  INPUT=SMOOTH;
%;


*OUTPUT SMOOTH AND RESIDUALS INTO SAS DATA SETS.;
*ENDS SMOOTHING SEQUENCE;

MACRO OUTDATA;
  FREE INPUT;
  _TMP_ROW = 'ROW1    ' : COMPRESS('ROW'+CHAR(NROW(SMOOTH)));
  CREATE SMOOTH ( RENAME=(_TMP_ROW=ROW  ))
  FROM SMOOTH [ROWNAME=_TMP_ROW COLNAME=VARID ];
  APPEND FROM SMOOTH [ROWNAME=_TMP_ROW];
  USE IND (KEEP=INVAR);
  READ ALL INTO ROUGH ;
  ROUGH=ROUGH-SMOOTH;
  _TMP_ROW = 'ROW1    ' : COMPRESS('ROW'+CHAR(NROW(ROUGH)));
  CREATE RESID ( RENAME=(_TMP_ROW=ROW  ))
  FROM ROUGH [ROWNAME=_TMP_ROW COLNAME=VARID ];
  APPEND FROM ROUGH [ROWNAME=_TMP_ROW];
%;


*PLOT THE SMOOTH AND THE ROUGH VERSUS THE SPECIFIED X VARIABLE;
MACRO PLOTTER
 DATA SMOOTH;
     MERGE SMOOTH IND(KEEP=XVAR);
 RUN;
 PROC PLOT;
     PLOT (INVAR)*(XVAR);
     TITLE10 'PLOT OF THE SMOOTH';
 RUN;
 DATA RESID;
    MERGE RESID IND(KEEP=XVAR);
 RUN;
 PROC PLOT;
   PLOT (INVAR)*(XVAR);
   TITLE10 'PLOT OF THE ROUGH';
 RUN;
%

QUIT;


DATA A;
   INPUT WEEK COUNT TEMP @@;
CARDS;
1 1.810734 76.21045 2 0.4555256 76.5 3 0.8021978 76.71429
4 0.8076923 77.42857 5
0.624 78.356 6 0.4312668 77 7 1.19407 78.64286 8 5.943396 78.42857 9 4.3
80.14286 10 1.454545 79.71925 11 1.498652 81.42857 12 8.638814 82.07143
4.967655 82.21429 14 2.838275 82.64286 15 0.425876 80.92857 16 2.177898
82.42857
17 5.749326 81.71429 18 6.363881 81.57143 19 1.822102 81.42857 20 2.2371
81.57143 21 2.083558 81.71429 22 2.716981 82.14286 23 0.9380054 82 24 1.
82.42857 25 3.115566 82.8125 26 2.619497 81.33333 27 2.601078 81.35714 2
1.242588 82.07143 29 3.668464 82.42857 30 3.967655 81.78571 31 1.490617
81.71582
32 0.2318059 80.21429 33 1.177898 81.07143 34 1.681941 80.07143 35 1.881
80.14286 36 0.09433962 80.07143 37 0.5795148 79.85714 38 1.091644 78.857
1.040431 78.07143 40 0.0754717 77.64286 41 0.1863208 77.5625 42 0.431266
77.14286 43 0.3113208 77.58333 44 0.1563342 77.28571 45 0.03773585 76.07
0.1320755 75.85714 47 0.05390836 74.71429 48 0.0296496 75.21429 49 0.029
77.35714 50 0.1644205 77.28571 51 0.1455526 76.85714 52 0.05660377 75.5
PROC PLOT; PLOT (TEMP COUNT)*WEEK;
TITLE10 'PLOT OF THE RAW DATA';
RUN;

MACRO IND A %
MACRO INVAR TEMP COUNT %
MACRO XVAR WEEK %
MACRO SMOOTH S %
MACRO RESID R %
INDATA THREE_R SPLIT SPLIT HAN MED3 OUTDATA PLOTTER