Automatic Model Selection in Contingency Tables
/****************************************************************/
/* S A S S A M P L E L I B R A R Y */
/* */
/* NAME: LOGLIN */
/* TITLE: Automatic Model Selection in Contingency Tables */
/* PRODUCT: IML */
/* SYSTEM: ALL */
/* KEYS: SSTAT MATRIX */
/* PROCS: IML */
/* DATA: */
/* */
/* SUPPORT: LWB UPDATE: */
/* REF: REFERENCES: */
/* GOODMAN, L.A. (1971). The Analysis of */
/* Multidimensional Contingency Tables: Stepwise */
/* Procedures and Direct Estimation Methods for Model */
/* Building for Multiple Classification. */
/* Technometrics, 13, 33-61 */
/* LIN, S.P. (1982). Algorithm AS 185. Automatic Model */
/* Selection in Contingency Tables. */
/* APPLIED STATISTICS, 31, 317-326 */
/* MISC: CONVERTED FROM MATRIX TO IML USING MATIML */
/* */
/****************************************************************/
PROC IML;
RESET AUTONAME FW=8;
TITLE1 'AUTOMATIC MODEL SELECTION IN CONTINGENCY TABLES';
TITLE2 'BACKWARD ELIMINATION PROCEDURE';
*--------------------------------------------------------------;
*-----FIRST DEFINE IML MODULES TO BE USED BY MAIN PROGRAM------;
*--------------------------------------------------------------;
*-----ROUTINE TO PERFORM ITERATIVE BACKWARD ELIMINATION--------;
START BWELM;
DO WHILE (IM^=0);
NOIT=NOIT+1;
PP=0;
IM=0;
PRINT " ITERATION NO:";
PRINT NOIT;
IF ( NOIT=1) THEN IF ( INIT=1)
THEN PRINT " INITIAL FIT GIVEN BY USER IS:";
ELSE PRINT " INITIAL FIT SELECTED BY THE PROGRAM IS:";
ELSE PRINT "BEST SIMPLIFIED MODEL FROM THE PREVIOUS ITERATION IS:";
STAT=LRI || DFI || PI;
RR=R1;
CONFIG=CONFGI;
RUN CONWRT;
PRINT " SIMPLIFIED MODELS IN THIS ITERATION ARE:";
DO II=1 TO NCONI;
RUN SUBMOD;
CALL IPF(FIT,STATUS,DIM,TABLE,CONFIG);
RUN LRCHSQ;
RUN DEGREF;
P=1- PROBCHI(LR,DF);
LRD=LR-LRI;
DFD=DF-DFI;
PD=1- PROBCHI(LRD, DFD);
RR=R2;
STAT=(LR || DF || P) // (LRD || DFD || PD);
RUN CONWRT;
IF ( P>=AP1 & PD >=AP3) THEN IF ( PD>=PP) THEN DO;
RUN COPYB;
PP=PD;
IM=IM+1;
END;
END;
IF ( IM^=0) THEN
RUN COPYT;
END;
STAT=LRI || DFI || PI;
RR=R1;
PRINT
" ALL SIMPLIFIED MODELS IN THE ABOVE ITERATION ARE UNACCEPTABLE";
PRINT " THE BEST FINAL FIT IS";
CONFIG=CONFGI;
RUN CONWRT;
ABORT;
FINISH;
*--ROUTINE TO OBTAIN TOTAL NUMBER OF K-SUBSETS OF A N-SET----;
START NCK;
M=1;
DO KI=1 TO K;
M=M#(N-KI+1)/KI;
END;
FINISH;
* ROUTINE TO GET NEXT K-SUBSET OF AN N-SET, IN LEXICOGRAPHIC ORDER;
START NXKSB;
IF ( MM=1) THEN DO;
H=K;
C=0;
END;
ELSE DO;
H=1;
DO WHILE ( A[K+1-H,1]=N+1-H);
H=H+1;
END;
C= A[K+1-H,1];
END;
DO J=1 TO H;
A[K+J-H,1]=C+J;
END;
FINISH;
*---ROUTINE TO COMPUTE TWICE THE LIKELIHOOD RATIO STATISTIC--;
START LRCHSQ;
SUMM=0;
DO I=1 TO NTAB;
IF ( TABLE[1,I]^=0) THEN
SUMM=SUMM+ TABLE[1,I]* LOG( TABLE[1,I]/FIT[1,I]);
END;
LR=2#SUMM;
FINISH;
*-ROUTINE TO COMPUTE DEGREE OF FREEDOM OF A LOG-LINEAR MODEL-;
START DEGREF;
NP=0;
HASH=A1;
DO NN=1 TO NCON;
N= NCOL( LOC( CONFIG[,NN]));
DO K=1 TO N;
RUN NCK;
A=A0;
DO MM=1 TO M;
KK=0;
LL=1;
RUN NXKSB;
DO KI=1 TO K;
TEMP= CONFIG[ A[KI,1],NN];
KK=KK+2##(TEMP-1);
LL=LL#( DIM[1,TEMP]-1);
END;
KK= ROUND(KK);
IF ( HASH[1,KK]=0) THEN DO;
HASH[1,KK]=1;
NP=NP+LL;
END;
END;
END;
END;
DF=NTAB-NP-1;
FINISH;
*-ROUTINE TO CONSTRUCT SIMPLIFIED MODELS BY DELETING AN EFFECT-;
START SUBMOD;
N= NCOL( LOC( CONFGI[,II]));
K=N-1;
RUN NCK;
A=A0;
NLOW=0;
IF ( NCONI>1) THEN DO;
IDX= J(1,NCONI-1,0);
ICON=1:NCONI;
DO J=1 TO NCONI;
IF ( J<II) THEN IDX[1,J]= ICON[1,J];
IF ( J>II) THEN IDX[1,J-1]= ICON[1,J];
END;
CONFG1= CONFGI[,IDX];
END;
DO MM=1 TO M;
RUN NXKSB;
KS= NCOL( LOC(A));
ILOW=1;
B=A0;
B[1:KS,1]= CONFGI[ A[1:KS,1],II];
IF ( NCONI>1) THEN DO I1=1 TO NCONI-1 WHILE (ILOW=1);
I3=0;
IF ( NCOL( LOC( CONFG1[,I1]))>KS) THEN DO;
DO I2=1 TO KS;
I3=I3+( SUM( CONFG1[,I1]= B[I2,1])=1);
END;
IF ( I3=KS) THEN ILOW=0;
END;
END;
NLOW=NLOW+ILOW;
IF ( ILOW=1) THEN IF ( NLOW>1) THEN CONFG2=CONFG2 || B;
ELSE CONFG2=B;
END;
IF ( NCONI=1) THEN CONFIG=CONFG2;
ELSE IF ( NLOW>=1) THEN CONFIG=CONFG1 || CONFG2;
ELSE CONFIG=CONFG1;
NCON= NCOL(CONFIG);
FINISH;
*-----ROUTINE TO PRINT MODEL CONFIGURATION & STATISTICS------;
START CONWRT;
MODEL= J(1, NCOL(CONFIG),0);
DO I=1 TO NVAR;
MODEL[1,]= MODEL[1,]+ CONFIG[I,]#(10##(NVAR-I));
END;
PRINT MODEL;
PRINT STAT[ FORMAT=8.3][ ROWNAME=RR][ COLNAME=CC];
FINISH;
*-------------------EXIT AND COPY ROUTINES-------------------;
START COPYI;
NCONI=NCON;
CONFGI=CONFIG;
LRI=LR;
DFI=DF;
PI=P;
FINISH;
START COPYB;
NCONB=NCON;
CONFGB=CONFIG;
LRB=LR;
DFB=DF;
PB=P;
FINISH;
START COPYT;
NCONI=NCONB;
CONFGI=CONFGB;
LRI=LRB;
DFI=DFB;
PI=PB;
FINISH;
START FAULT;
PRINT " ERROR IN NVAR, DIM OR TABLE";
ABORT;
FINISH;
START SATUR;
PRINT " THE BEST FINAL FIT IS THE SATURATED MODEL";
ABORT;
FINISH;
*------------------------------------------------------------;
*--NOW THAT IML MODULES ARE DEFINED, BEGIN THE MAIN PROGRAM--;
*------------------------------------------------------------;
START MAIN;
*--------------------USER-PROVIDED INFORMATION------------;
NVAR=3;
DIM={2 2 2};
AP1=0.05;
AP2=0.10;
AP3=0.10;
INIT=1;
TABLE={156 84 84 156 107 133 31 209};
*DATA FROM BISHOP ET AL. (1975);
CONFIG={1 1 2,2 3 3,0 0 0};
*USER-SPECIFIED INITIAL FIT IF INIT=1;
*------------ERROR CHECKING AND INITILIZATION-------------;
IF ( NCOL(DIM)^=NVAR | NCOL(TABLE)^= DIM[,#]) THEN
RUN FAULT;
A0= J(NVAR,1,0);
A1= J(1,2##NVAR-1,0);
IFIT=0;
NOIT=0;
IM=1;
NTAB= DIM[,#];
R1='FIT';
R2={'FIT' 'DIFF'};
CC={'LR' 'DF' 'P'};
*-------------FIT USER-SPECIFIED INITIAL FIT--------------;
IF ( INIT=1) THEN DO;
NCON= NCOL(CONFIG);
IF ( NCON=1 & NROW(CONFIG)=NVAR) THEN DO;
LR=0;
DF=0;
P=1;
END;
ELSE DO;
CALL IPF(FIT,STATUS,DIM,TABLE,CONFIG);
RUN LRCHSQ;
RUN DEGREF;
P=1- PROBCHI(LR,DF);
END;
RUN COPYI;
RUN BWELM;
END;
*--------------SEARCH FOR A BEST INITIAL FIT---------------;
PRINT " SELECTION OF A BEST INITIAL FIT:";
DO NWAY=1 TO NVAR;
IF ( NWAY=NVAR) THEN
RUN SATUR;
N=NVAR;
K=NWAY;
RUN NCK;
NCON=M;
A=A0;
DO MM=1 TO NCON;
RUN NXKSB;
IF ( MM=1) THEN CONFIG=A;
ELSE CONFIG=CONFIG || A;
END;
CALL IPF(FIT,STATUS,DIM,TABLE,CONFIG);
RUN LRCHSQ;
RUN DEGREF;
P=1- PROBCHI(LR,DF);
STAT=LR || DF || P;
RR=R1;
RUN CONWRT;
IF ( P>=AP1) THEN DO;
IFIT=IFIT+1;
IF ( IFIT>1) THEN IF ( 1- PROBCHI(LRI-LR,DFI-DF)>AP2) THEN
RUN BWELM;
RUN COPYI;
IF ( NWAY=NVAR-1) THEN
RUN BWELM;
END;
END;
FINISH;
RUN MAIN; *------RUN MAIN MODULE-------;
title; title2;