Examples of Using the IPF Function
/****************************************************************/
/* S A S S A M P L E L I B R A R Y */
/* */
/* NAME: IPF2 */
/* TITLE: Examples of Using the IPF Function */
/* PRODUCT: IML */
/* SYSTEM: ALL */
/* KEYS: STATAPP CAT TABS */
/* PROCS: IML FREQ CATMOD */
/* DATA: */
/* */
/* SUPPORT: WMS UPDATE: */
/* REF: TECHNICAL REPORT TS-131 */
/* MISC: REQUIRES SAS/STAT TO VERIFY IPF. */
/* */
/****************************************************************/
********** EXAMPLE 1 ***********;
*======================================================================;
* STEP 1: INPUT THE DATA ;
*======================================================================;
/*---TABLE OF A*B---*/
data ab; input a b wt @@; cards;
1 1 8 1 2 2 2 1 4 2 2 4
;
proc freq; weight wt; table a*b / list; run;
/*---TABLE OF C*B---*/
data cb; input c b wt @@; cards;
1 1 6 1 2 0 2 1 6 2 2 6
;
proc freq; weight wt; table c*b / list; run;
*======================================================================;
* STEP 2: USE IML TO CREATE THE "ORIGINAL" TABLE ;
*======================================================================;
proc iml;
/*----COMPUTE JOINT DISTRIBUTION FOR A BY B----*/
use ab; read all into abyb;
ab = shape( abyb[,3], 2, 2);
/*----COMPUTE JOINT DISTRIBUTION FOR C BY B----*/
use cb; read all into cbyb;
cb = shape( cbyb[,3], 2, 2);
/*----COMBINE THE 2 JOINT DISTRIBUTIONS INTO A 3-WAY TABLE----*/
i=1; tab = shape(0, 1 , 8);
do a=1 to 2; do b=1 to 2; do c=1 to 2;
tab[i] = min( ab[a,b], cb[c,b] );
ab[a,b] = ab[a,b] - tab[i];
cb[c,b] = cb[c,b] - tab[i];
i = i + 1;
end; end; end;
/*---SET UP THE VARIABLE PROFILES AND THE VARIABLE NAMES---*/
r1 = { 1 1 1 1 2 2 2 2 };
r2 = { 1 1 2 2 1 1 2 2 };
r3 = { 1 2 1 2 1 2 1 2 };
cname = { a b c wt };
matrix = r1 // r2 // r3 // tab;
matrix=matrix`;
/*---OUTPUT TO A SAS DATA SET---*/
create out from matrix[colname=cname];
append from matrix;
quit;
*======================================================================;
* STEP 3: VERIFY THAT "ORIGINAL" TABLE HAS CORRECT 2-WAY MARGINS ;
*======================================================================;
proc print data=out;
proc freq data=out; weight wt; table a*b c*b / list; run;
*======================================================================;
* STEP 4: USE IML TO DO IPF FOR MODEL (AB, CB) ;
*======================================================================;
proc iml;
use out;
read all into matrix;
/* var3 var2 var1 */
/* ------ ---- ------ */
/* C B A */
table = matrix[,4];
dim = { 2 2 2 };
config = { 1 3 , 2 2 };
call ipf(fit, status, dim, table, config);
matrix[,4] = fit;
cname = { a b c wt };
create fit from matrix[colname=cname];
append from matrix;
quit;
*======================================================================;
* STEP 5: VERIFY THAT THE "FITTED" TABLE HAS CORRECT 2-WAY MARGINS. ;
* PROJECT THE FITTED TABLE ONTO THE A*C PLANE. ;
*======================================================================;
proc print data=fit;
proc freq data=fit; weight wt; table a*b c*b a*c / list; run;
********** EXAMPLE 2 ***********;
*======================================================================;
* STEP 1: INPUT THE DATA ;
*======================================================================;
data t374;
do sex=1 to 2; do age=1 to 3; do region=1 to 2; do school=1 to 4;
input wt @; output; end; end; end; end; cards;
631571 4381454 7557356 915307 1719046 5266179 5718852 153669
1176623 4174797 5384310 1055949 2386652 4297528 3050642 316853
2376214 1807879 1187537 501741 5378665 1862132 376084 68963
664308 2843925 9180042 1274867 2536078 4246961 5886728 232464
3825831 4481894 6873450 1070910 7181386 4653138 3089527 241507
7432953 1888644 1312793 275948 13089113 1071037 207498 24283
;
/*---A TABLE CONTAINING 1 OR MORE OLD VARS, & 1 OR MORE NEW VARS---*/
data t375;
do party=1 to 2; do school=1 to 4; input wt @; output; end; end; cards;
0 2290478 4580957 1367696
48398440 38685090 45243862 4764765
;
data t376;
do party = 1 to 2; do sex=1 to 2; input wt @; output; end; end; cards;
6632500 1606631
55113503 81978654
;
data t377;
do party = 1 to 2; do age=1 to 3; input wt @; output; end; end; cards;
1623109 4663348 1952674
51585698 48597649 36908810
;
/*---REORDER THE INITIAL DATA IN A DATASET CALLED INIT---*/
proc freq data=t374;
table school*sex*age*region / noprint out=init; weight wt; run;
*======================================================================;
* STEP 2: RUN IML TO DO ITERATIVE PROPORTIONAL FITTING ;
*======================================================================;
proc iml;
/*----COMPUTE MARGINALS FOR SCHOOL (PARTY MEMBERS ONLY)----*/
use t375; read all into t375;
schoolp = shape( t375[,3], 2, 4);
schpy = schoolp[1,];
/*----COMPUTE MARGINALS FOR SEX (PARTY MEMBERS ONLY)----*/
use t376; read all into t376;
sexp = shape( t376[,3], 2, 2);
sexpy = sexp[1,];
/*----COMPUTE MARGINALS FOR AGE (PARTY MEMBERS ONLY)----*/
use t377; read all into t377;
agep = shape( t377[,3], 2, 3);
agepy = agep[1,];
/*----COMBINE THE 3 MARGINALS INTO A 3-WAY TABLE----*/
i=1; tab = shape(0, 1 , 24);
do school=1 to 4; do sex=1 to 2; do age=1 to 3;
a = min( schpy[school], sexpy[sex] );
a = min( a, agepy[age] );
tab[i] = a; i = i + 1;
schpy[school] = schpy[school] - a;
sexpy[sex] = sexpy[sex] - a;
agepy[age] = agepy[age] - a;
end; end; end;
/*---INTRODUCE REGION, CREATING A 4-WAY TABLE---*/
table = tab` @ { .5 , .5 };
/*---READ IN 1959 INITIAL 4-WAY TABLE, & ADJUST FOR NEW MARGINS---*/
use init; read all into mat;
init = mat[,5];
dim = { 2 3 2 4 };
/* var4 var3 var2 var1 */
/* ---- ------ ---- ------ */
cname = { school sex age region count };
config = { 2 3 4 } ;
call ipf(fit, status, dim, table, config, init);
/*---OUTPUT THE PROFILES AND THE FITTED FREQUENCIES---*/
mat[,5] = fit;
mat = mat[,1:5];
create party from mat[colname=cname];
append from mat;
quit;
*======================================================================;
* STEP 3: PRINT THE RESULTS FOR REGION=1 (URBAN) ONLY ;
*======================================================================;
options ls=71;
data; set party; if region=1; output; drop region; run;
proc print; run;
/*---GET THE LOG ODDS RATIOS FOR THE ORIGINAL FREQUENCIES---*/
proc sort data=t374;
by sex region age school;
run;
data party2; set t374; run;
proc catmod data=party2;
by sex region;
weight wt;
response 1 -1 0 0 -1 1 0 0 0 0 0 0 ,
1 -1 0 0 0 0 0 0 -1 1 0 0 ,
0 1 -1 0 0 -1 1 0 0 0 0 0 ,
0 1 -1 0 0 0 0 0 0 -1 1 0 ,
0 0 1 -1 0 0 -1 1 0 0 0 0 ,
0 0 1 -1 0 0 0 0 0 0 -1 1 log / out=outest;
model age*school = / noprofile nodesign noparm;
run;
data; set outest; keep sex region _pred_;
options ls=71;
proc print; run;
*======================================================================;
* TO VERIFY THAT IPF IS PRESERVING INTERACTIONS TO THE ;
* EXTENT POSSIBLE WITH ZERO FREQUENCIES, USE THE CATMOD ;
* PROCEDURE TO COMPUTE 6 LOG RATIOS FOR EACH AGE*SCHOOL ;
* TABLE WITHIN THE 4 CATEGORIES OF SEX*REGION. ;
*======================================================================;
/*---GET THE LOG ODDS RATIOS FOR THE SAS IPF ESTIMATES---*/
proc sort data=party;
by sex region age school;
run;
data party2; set party; if count=0 then count=1e-20;
proc catmod data=party2;
by sex region;
weight count;
response 1 -1 0 0 -1 1 0 0 0 0 0 0 ,
1 -1 0 0 0 0 0 0 -1 1 0 0 ,
0 1 -1 0 0 -1 1 0 0 0 0 0 ,
0 1 -1 0 0 0 0 0 0 -1 1 0 ,
0 0 1 -1 0 0 -1 1 0 0 0 0 ,
0 0 1 -1 0 0 0 0 0 0 -1 1 log / out=outest;
model age*school = / noprofile nodesign noparm;
run;
data; set outest; keep sex region _pred_;
proc print; run;
**********************************************************************;
/*---ENTER THE BFH ESTIMATES -- ONLY REGION 1 ESTIMATES AVAILABLE---*/
data bfhp; do sex=1 to 2; do age=1 to 3; do school=1 to 4;
input wt @; output; end; end; end; cards;
1e-20 263941 912765 156054
1e-20 962909 2124020 601249
1e-20 688363 589989 333213
1e-20 37281 206159 46910
1e-20 214929 592609 167632
1e-20 123058 155413 62639
;
proc print; run;
/*---GET THE LOG ODDS RATIOS FOR THE BFH ESTIMATES---*/
proc catmod data=bfhp;
by sex;
weight wt;
response 1 -1 0 0 -1 1 0 0 0 0 0 0 ,
1 -1 0 0 0 0 0 0 -1 1 0 0 ,
0 1 -1 0 0 -1 1 0 0 0 0 0 ,
0 1 -1 0 0 0 0 0 0 -1 1 0 ,
0 0 1 -1 0 0 -1 1 0 0 0 0 ,
0 0 1 -1 0 0 0 0 0 0 -1 1 log / out=outest;
model age*school = / noprofile nodesign noparm;
run;
data; set outest; keep sex _pred_;
proc print; run;