## Examples of Using the IPF Function

``` /****************************************************************/
/*            S A S   S A M P L E   L I B R A R Y               */
/*                                                              */
/*    NAME: IPF2                                                */
/*   TITLE: Examples of Using the IPF Function                  */
/* PRODUCT: IML                                                 */
/*  SYSTEM: ALL                                                 */
/*    KEYS: STATAPP CAT TABS                                    */
/*   PROCS: IML FREQ CATMOD                                     */
/*    DATA:                                                     */
/*                                                              */
/* SUPPORT: WMS                         UPDATE:                 */
/*     REF: TECHNICAL REPORT TS-131                             */
/*    MISC: REQUIRES SAS/STAT TO VERIFY IPF.                    */
/*                                                              */
/****************************************************************/

**********  EXAMPLE 1  ***********;

*======================================================================;
*    STEP 1: INPUT THE DATA                                            ;
*======================================================================;
/*---TABLE OF A*B---*/
data ab; input a b wt @@; cards;
1 1 8  1 2 2   2 1 4  2 2 4
;
proc freq; weight wt; table a*b / list; run;

/*---TABLE OF C*B---*/
data cb; input c b wt @@; cards;
1 1 6  1 2 0   2 1 6  2 2 6
;
proc freq; weight wt; table c*b / list; run;

*======================================================================;
*    STEP 2: USE IML TO CREATE THE "ORIGINAL" TABLE                    ;
*======================================================================;
proc iml;

/*----COMPUTE JOINT DISTRIBUTION FOR A BY B----*/
use ab; read all into abyb;
ab = shape( abyb[,3], 2, 2);

/*----COMPUTE JOINT DISTRIBUTION FOR C BY B----*/
use cb; read all into cbyb;
cb = shape( cbyb[,3], 2, 2);

/*----COMBINE THE 2 JOINT DISTRIBUTIONS INTO A 3-WAY TABLE----*/
i=1;    tab = shape(0, 1 , 8);
do a=1 to 2; do b=1 to 2; do c=1 to 2;
tab[i] = min( ab[a,b], cb[c,b] );
ab[a,b] = ab[a,b] - tab[i];
cb[c,b] = cb[c,b] - tab[i];
i = i + 1;
end; end; end;

/*---SET UP THE VARIABLE PROFILES AND THE VARIABLE NAMES---*/
r1 = { 1 1 1 1 2 2 2 2 };
r2 = { 1 1 2 2 1 1 2 2 };
r3 = { 1 2 1 2 1 2 1 2 };
cname = { a b c wt };
matrix = r1 // r2 // r3 // tab;
matrix=matrix`;

/*---OUTPUT TO A SAS DATA SET---*/
create out from matrix[colname=cname];
append from matrix;
quit;

*======================================================================;
*   STEP 3: VERIFY THAT "ORIGINAL" TABLE HAS CORRECT 2-WAY MARGINS     ;
*======================================================================;
proc print data=out;
proc freq data=out; weight wt; table a*b c*b / list; run;

*======================================================================;
*    STEP 4: USE IML TO DO IPF FOR MODEL (AB, CB)                      ;
*======================================================================;
proc iml;
use out;
/*  var3    var2   var1    */
/*  ------  ----   ------  */
/*    C       B       A    */
table = matrix[,4];
dim = { 2 2 2 };
config = { 1 3 , 2 2 };
call ipf(fit, status, dim, table, config);

matrix[,4] = fit;
cname = { a b c wt };
create fit from matrix[colname=cname];
append from matrix;
quit;

*======================================================================;
*  STEP 5:  VERIFY THAT THE "FITTED" TABLE HAS CORRECT 2-WAY MARGINS.  ;
*           PROJECT THE FITTED TABLE ONTO THE A*C PLANE.               ;
*======================================================================;
proc print data=fit;
proc freq data=fit; weight wt; table a*b c*b a*c / list; run;

**********  EXAMPLE 2  ***********;

*======================================================================;
*    STEP 1: INPUT THE DATA                                            ;
*======================================================================;

data t374;
do sex=1 to 2; do age=1 to 3; do region=1 to 2; do school=1 to 4;
input wt @; output; end; end; end; end; cards;
631571  4381454  7557356   915307  1719046  5266179  5718852  153669
1176623  4174797  5384310  1055949  2386652  4297528  3050642  316853
2376214  1807879  1187537   501741  5378665  1862132   376084   68963
664308  2843925  9180042  1274867  2536078  4246961  5886728  232464
3825831  4481894  6873450  1070910  7181386  4653138  3089527  241507
7432953  1888644  1312793   275948 13089113  1071037   207498   24283
;

/*---A TABLE CONTAINING 1 OR MORE OLD VARS, & 1 OR MORE NEW VARS---*/
data t375;
do party=1 to 2; do school=1 to 4; input wt @; output; end; end; cards;
0   2290478   4580957   1367696
48398440  38685090  45243862   4764765
;

data t376;
do party = 1 to 2; do sex=1 to 2; input wt @; output; end; end; cards;
6632500   1606631
55113503  81978654
;

data t377;
do party = 1 to 2; do age=1 to 3; input wt @; output; end; end; cards;
1623109   4663348   1952674
51585698  48597649  36908810
;

/*---REORDER THE INITIAL DATA IN A DATASET CALLED INIT---*/
proc freq data=t374;
table school*sex*age*region / noprint out=init; weight wt; run;

*======================================================================;
*    STEP 2: RUN IML TO DO ITERATIVE PROPORTIONAL FITTING              ;
*======================================================================;
proc iml;

/*----COMPUTE MARGINALS FOR SCHOOL (PARTY MEMBERS ONLY)----*/
use t375; read all into t375;
schoolp = shape( t375[,3], 2, 4);
schpy = schoolp[1,];

/*----COMPUTE MARGINALS FOR SEX (PARTY MEMBERS ONLY)----*/
use t376; read all into t376;
sexp = shape( t376[,3], 2, 2);
sexpy = sexp[1,];

/*----COMPUTE MARGINALS FOR AGE (PARTY MEMBERS ONLY)----*/
use t377; read all into t377;
agep = shape( t377[,3], 2, 3);
agepy = agep[1,];

/*----COMBINE THE 3 MARGINALS INTO A 3-WAY TABLE----*/
i=1;    tab = shape(0, 1 , 24);
do school=1 to 4; do sex=1 to 2; do age=1 to 3;
a = min( schpy[school], sexpy[sex] );
a = min( a, agepy[age] );
tab[i] = a;    i = i + 1;
schpy[school] =  schpy[school] - a;
sexpy[sex]    =  sexpy[sex] - a;
agepy[age]    =  agepy[age] - a;
end; end; end;

/*---INTRODUCE REGION, CREATING A 4-WAY TABLE---*/
table = tab` @ { .5 , .5 };

use init; read all into mat;
init = mat[,5];
dim = { 2 3 2 4 };
/*           var4   var3    var2   var1    */
/*           ----   ------  ----   ------  */
cname = {    school sex     age    region   count };
config = { 2 3 4 } ;
call ipf(fit, status, dim, table, config, init);

/*---OUTPUT THE PROFILES AND THE FITTED FREQUENCIES---*/
mat[,5] = fit;
mat = mat[,1:5];
create party from mat[colname=cname];
append from mat;
quit;

*======================================================================;
*     STEP 3: PRINT THE RESULTS FOR REGION=1 (URBAN) ONLY              ;
*======================================================================;
options ls=71;
data; set party; if region=1; output; drop region; run;
proc print; run;

/*---GET THE LOG ODDS RATIOS FOR THE ORIGINAL FREQUENCIES---*/
proc sort data=t374;
by sex region age school;
run;
data party2; set t374; run;
proc catmod data=party2;
by sex region;
weight wt;
response  1 -1  0  0   -1  1  0  0    0  0  0  0 ,
1 -1  0  0    0  0  0  0   -1  1  0  0 ,
0  1 -1  0    0 -1  1  0    0  0  0  0 ,
0  1 -1  0    0  0  0  0    0 -1  1  0 ,
0  0  1 -1    0  0 -1  1    0  0  0  0 ,
0  0  1 -1    0  0  0  0    0  0 -1  1   log / out=outest;
model age*school = / noprofile nodesign noparm;
run;
data; set outest; keep sex region _pred_;
options ls=71;
proc print; run;

*======================================================================;
*             TO VERIFY THAT IPF IS PRESERVING INTERACTIONS TO THE     ;
*             EXTENT POSSIBLE WITH ZERO FREQUENCIES, USE THE CATMOD    ;
*             PROCEDURE TO COMPUTE 6 LOG RATIOS FOR EACH AGE*SCHOOL    ;
*             TABLE WITHIN THE 4 CATEGORIES OF SEX*REGION.             ;
*======================================================================;

/*---GET THE LOG ODDS RATIOS FOR THE SAS IPF ESTIMATES---*/
proc sort data=party;
by sex region age school;
run;
data party2; set party; if count=0 then count=1e-20;
proc catmod data=party2;
by sex region;
weight count;
response  1 -1  0  0   -1  1  0  0    0  0  0  0 ,
1 -1  0  0    0  0  0  0   -1  1  0  0 ,
0  1 -1  0    0 -1  1  0    0  0  0  0 ,
0  1 -1  0    0  0  0  0    0 -1  1  0 ,
0  0  1 -1    0  0 -1  1    0  0  0  0 ,
0  0  1 -1    0  0  0  0    0  0 -1  1   log / out=outest;
model age*school = / noprofile nodesign noparm;
run;
data; set outest; keep sex region _pred_;
proc print; run;

**********************************************************************;

/*---ENTER THE BFH ESTIMATES -- ONLY REGION 1 ESTIMATES AVAILABLE---*/
data bfhp; do sex=1 to 2; do age=1 to 3; do school=1 to 4;
input wt @; output; end; end; end; cards;
1e-20   263941   912765   156054
1e-20   962909  2124020   601249
1e-20   688363   589989   333213
1e-20    37281   206159    46910
1e-20   214929   592609   167632
1e-20   123058   155413    62639
;
proc print; run;

/*---GET THE LOG ODDS RATIOS FOR THE BFH ESTIMATES---*/
proc catmod data=bfhp;
by sex;
weight wt;
response  1 -1  0  0   -1  1  0  0    0  0  0  0 ,
1 -1  0  0    0  0  0  0   -1  1  0  0 ,
0  1 -1  0    0 -1  1  0    0  0  0  0 ,
0  1 -1  0    0  0  0  0    0 -1  1  0 ,
0  0  1 -1    0  0 -1  1    0  0  0  0 ,
0  0  1 -1    0  0  0  0    0  0 -1  1   log / out=outest;
model age*school = / noprofile nodesign noparm;
run;
data; set outest; keep sex _pred_;
proc print; run;

```