| Language Reference |
performs an iterative proportional fit of a contingency table
/* Return a table such that cell (i,j) has value
(sum of row i)(sum of col j)/(sum of all cells) */
start GetIndepTableFromMargins( bottom, side );
if bottom[+] ^= side[+] then do;
print "Marginal totals are not equal";
abort;
end;
table = side*bottom/side[+];
return (table);
finish;
/* Use a "greedy" algorithm to create a table whose
marginal totals match given marginal totals.
Margin1 is the vector of frequencies totaled down
each column. Margin1 means that
Variable 1 has NOT been summed over.
Margin2 is the vector of frequencies totaled across
each row. Margin2 means that Variable 2
has NOT been summed over.
After calling, use SHAPE to change the shape of
the returned argument. */
start GetGreedyTableFromMargins( Margin1, Margin2 );
/* copy arguments so they are not corrupted */
m1 = colvec(Margin1); /* colvec is in IMLMLIB */
m2 = colvec(Margin2);
if m1[+] ^= m2[+] then do;
print "Marginal totals are not equal";
abort;
end;
dim1 = nrow(m1);
dim2 = nrow(m2);
table = j(1,dim1*dim2,0);
/* give as much to cell (1,1) as possible,
then as much as remains to cell (1,2), etc,
until all the margins have been distributed */
idx = 1;
do i2 = 1 to dim2;
do i1 = 1 to dim1;
t = min(m1[i1],m2[i2]);
table[idx] = t;
idx = idx + 1;
m1[i1] = m1[i1]-t;
m2[i2] = m2[i2]-t;
end;
end;
return (table);
finish;
Mod = {0.01 15}; /* tighten stopping criterion */
Columns = {' Single' ' Married' 'Widow/Divorced'};
Rows = {'15 - 19' '20 - 24' '25 - 29' '30 - 34'
'35 - 39' '40 - 44' '45 - 49' '50 Or Over'};
/* Marital status has 3 levels. Age has 8 levels */
Dim = {3 8};
/* Use known distribution for start-up values */
IniTab = { 1306 83 0 ,
619 765 3 ,
263 1194 9 ,
173 1372 28 ,
171 1393 51 ,
159 1372 81 ,
208 1350 108 ,
1116 4100 2329 };
/* New marginal totals for age by marital status */
NewMarital = { 3988 11702 2634 };
NewAge = {1412,1402,1450,1541,1681,1532,1662,7644};
/* Create any table with these marginals */
Table = GetGreedyTableFromMargins(NewMarital, NewAge);
Table = shape(Table, nrow(IniTab), ncol(IniTab));
/* Consider all main effects */
Config = {1 2};
call ipf(Fit,Status,Dim,Table,Config,IniTab,Mod);
if Status[1] = 0 then
print 'Known Distribution (1957)',
IniTab [colname=Columns rowname=Rows format=8.0],,
'Adjusted Estimates Of Distribution (1958)',
Fit [colname=Columns rowname=Rows format=8.2];
else
print "IPF did not converge in "
(Status[3]) " iterations";
Known Distribution (1957)
INITAB
Single Married Widow/Divorced
15 - 19 1306 83 0
20 - 24 619 765 3
25 - 29 263 1194 9
30 - 34 173 1372 28
35 - 39 171 1393 51
40 - 44 159 1372 81
45 - 49 208 1350 108
50 Or Over 1116 4100 2329
Adjusted Estimates Of Distribution (1958)
FIT
Single Married Widow/Divorced
15 - 19 1325.27 86.73 0.00
20 - 24 615.56 783.39 3.05
25 - 29 253.94 1187.18 8.88
30 - 34 165.13 1348.55 27.32
35 - 39 173.41 1454.71 52.87
40 - 44 147.21 1308.12 76.67
45 - 49 202.33 1352.28 107.40
50 Or Over 1105.16 4181.04 2357.81
/* Yea Abstain Nay */
IniTab = { 61 12 60, /* North */
17 6 1, /* Border */
39 22 7 }; /* South */
NewVotes = {100 100 100};
NewSection = {100,100,100};
FIT
Yea Abstain Nay
North 20.1 10.2 69.7
Border 47.4 42.8 9.8
South 32.5 47.0 20.5
do i3 = 1 to dim3;
do i2 = 1 to dim2;
do i1 = 1 to dim1;
t = min(m1[i1],m2[i2],m3[i3]);
table[idx] = t;
idx = idx + 1;
m1[i1] = m1[i1]-t;
m2[i2] = m2[i2]-t;
m3[i3] = m3[i3]-t;
end;
end;
end;
| Father's Educational Level | ||||||
| Self- | Not HS | HS | Some | Coll | Post | |
| Religion | Esteem | Grad | Grad | Coll | Grad | Coll |
| High | 575 | 388 | 100 | 77 | 51 | |
| Catholic | ||||||
| Low | 267 | 153 | 40 | 37 | 19 | |
| High | 117 | 102 | 67 | 87 | 62 | |
| Jewish | ||||||
| Low | 48 | 35 | 18 | 12 | 13 | |
| High | 359 | 233 | 109 | 197 | 90 | |
| Protestant | ||||||
| Low | 159 | 173 | 47 | 82 | 32 | |
dim={5 2 3};
table={
/* Father's Education:
NotHSGrad HSGrad Col ColGrad PostCol
Self-
Relig Esteem */
/* Cath- Hi */ 575 388 100 77 51,
/* olic Lo */ 267 153 40 37 19,
/* Jew- Hi */ 117 102 67 87 62,
/* ish Lo */ 48 35 18 12 13,
/* Prote- Hi */ 359 233 109 197 90,
/* stant Lo */ 159 173 47 82 32
};
config = { 1 3,
2 0 };
call marg(locmar, marginal, dim, table, config);
print locmar, marginal, table;
/* Examine marginals: The name indicates the
variable(s) that are NOT summed over.
The locmar variable tells where to index
into the marginal variable. */
Var12_Marg = marginal[1:(locmar[2]-1)];
Var12_Marg = shape(Var12_Marg,dim[2],dim[1]);
Var3_Marg = marginal[locMar[2]:ncol(marginal)];
NewTable = j(nrow(table),ncol(table),0);
/* give as much to cell (1,1,1) as possible,
then as much as remains to cell (1,1,2), etc,
until all the margins have been distributed. */
idx = 1;
do i3 = 1 to dim[3]; /* over Var3 */
do i2 = 1 to dim[2]; /* over Var2 */
do i1 = 1 to dim[1]; /* over Var1 */
/* Note Var12_Marg has Var1 varying across
the columns */
t = min(Var12_Marg[i2,i1],Var3_Marg[i3]);
NewTable[idx] = t;
idx = idx + 1;
Var12_Marg[i2,i1] = Var12_Marg[i2,i1]-t;
Var3_Marg[i3] = Var3_Marg[i3]-t;
end;
end;
end;
call marg(locmar, NewMarginal, dim, table, config);
maxDiff = abs(marginal-NewMarginal)[<>];
if maxDiff=0 then
print "Marginals are unchanged";
print NewTable;
LOCMAR
1 11
MARGINAL
COL1 COL2 COL3 COL4 COL5 COL6 COL7
ROW1 1051 723 276 361 203 474 361
MARGINAL
COL8 COL9 COL10 COL11 COL12 COL13
ROW1 105 131 64 1707 561 1481
TABLE
575 388 100 77 51
267 153 40 37 19
117 102 67 87 62
48 35 18 12 13
359 233 109 197 90
159 173 47 82 32
Marginals are unchanged
NEWTABLE
1051 656 0 0 0
0 0 0 0 0
0 67 276 218 0
0 0 0 0 0
0 0 0 143 203
474 361 105 131 64
| Crabmeat: | Y E S | N O | |||
| Potato salad: | Yes | No | Yes | No | |
| Ill | 120 | 4 | 22 | 0 | |
| Not Ill | 80 | 31 | 24 | 23 | |
/* Compute a chi-square score for a table of observed
values, given a table of expected values. Compare
this score to a chi-square value with given degrees
of freedom at 95% confidence level. */
start ChiSqTest( obs, model, degFreedom );
diff = (obs - model)##2 / model;
chiSq = diff[+];
chiSqCutoff = cinv(0.95, degFreedom);
print chiSq chiSqCutoff;
if chiSq > chiSqCutoff then
print "Reject hypothesis";
else
print "No evidence to reject hypothesis";
finish;
dim={2 2 2};
/* Crab meat: Y E S N O
Potato: Yes No Yes No */
table={ 120 4 22 0, /* Ill */
80 31 24 23 }; /* Not Ill */
crabmeat = " C R A B N O C R A B";
potato = {'YesPot' 'NoPot' 'YesPot' 'NoPot'};
illness = {'Ill', 'Not Ill'};
hypoth = "Hypothesis: no three-factor interaction";
config={1 1 2,
2 3 3};
call ipf(fit,status,dim,table,config);
print hypoth, "Fitted Model:",
fit[label=crabmeat colname=potato
rowname=illness format=6.2];
run ChiSqTest(table, fit, 1); /* 1 deg of freedom */
/* Test for interaction between Var 3 (Illness) and
Var 1 (Potato Salad) */
hypoth = "Hypothesis: no Illness-Potato Interaction";
config={1 2,
2 3};
call ipf(fit,status,dim,table,config);
print hypoth, "Fitted Model:",
fit[label=crabmeat colname=potato
rowname=illness format=6.2];
run ChiSqTest(table, fit, 2); /* 2 deg of freedom */
/* Test for interaction between Var 3 (Illness) and
Var 2 (Crab meat) */
hypoth = "Hypothesis: no Illness-Crab Interaction";
config={1 1,
2 3};
call ipf(fit,status,dim,table,config);
print hypoth, "Fitted Model:",
fit[label=crabmeat colname=potato
rowname=illness format=6.2];
run ChiSqTest(table, fit, 2); /* 2 deg of freedom */
HYPOTH
Hypothesis: no three-factor interaction
Fitted Model:
C R A B N O C R A B
YesPot NoPot YesPot NoPot
Ill 121.08 2.92 20.92 1.08
Not Ill 78.92 32.08 25.07 21.93
CHISQ CHISQCUTOFF
1.7021335 3.8414588
No evidence to reject hypothesis
HYPOTH
Hypothesis: no interaction between Illness and Potatoes
Fitted Model:
C R A B N O C R A B
YesPot NoPot YesPot NoPot
Ill 105.53 18.47 14.67 7.33
Not Ill 94.47 16.53 31.33 15.67
CHISQ CHISQCUTOFF
44.344643 5.9914645
Reject hypothesis
HYPOTH
Hypothesis: no interaction between Illness and Crab
Fitted Model:
C R A B N O C R A B
YesPot NoPot YesPot NoPot
Ill 115.45 2.41 26.55 1.59
Not Ill 84.55 32.59 19.45 21.41
CHISQ CHISQCUTOFF
5.0945132 5.9914645
No evidence to reject hypothesis
| Model | Config | Degrees of Freedom |
| No three-factor | {1 1 2, | |
| 2 3 3} | ||
| One two-factor absent | {1 2, | |
| 3 3} | ||
| {1 2, | ||
| 2 3} | ||
| {1 1, | ||
| 2 3} | ||
| Two two-factor absent | {2, 3} | |
| {1, 3} | ||
| {1, 2} | ||
| No two-factor | {1 2 3} | |
| Saturated | {1,2,3} |
dim={5 2 3};
table={
/* Father's Education:
<12 12 <16 16 >16
Self-
Relig Esteem */
/* Cath- Hi */ 575 388 100 77 51,
/* olic Lo */ 267 153 40 37 19,
/* Jew- Hi */ 117 102 67 87 62,
/* ish Lo */ 48 35 18 12 13,
/* Prote- Hi */ 359 233 109 197 90,
/* stant Lo */ 159 173 47 82 32
};
table={
/* Esteem: H I G H L O W */
/* <12 ... >16 <12 ... >16 */
575 ... 51 267 ... 19, /* Catholic */
117 ... 62 48 ... 13, /* Jewish */
359 ... 90 159 ... 32 /* Protestant*/
};
table={
/* CATHOLIC ... PROTESTANT
High Low ... High Low
<12 ... >16 <12 ... >16 ... <12 ... >16 <12 ... >16
*/
575 ... 51 267 ... 19 ... 359 ... 90 159 ... 32
};
Copyright © 2009 by SAS Institute Inc., Cary, NC, USA. All rights reserved.