FOCUS AREAS

SAS/STAT Examples

Estimating the Variance of a Variable in a Finite Population


Contents | SAS Program | PDF

data IceCreamStudy;
   input Grade StudyGroup Spending Weight @@;
   datalines; 
7  34  7 76.0   7  34  7 76.0  7 412  4 76.0  9  27 14 80.6 
7  34  2 76.0   9 230 15 80.6  9  27 15 80.6  7 501  2 76.0
9 230  8 80.6   9 230  7 80.6  7 501  3 76.0  8  59 20 84.0
7 403  4 76.0   7 403 11 76.0  8  59 13 84.0  8  59 17 84.0
8 143 12 84.0   8 143 16 84.0  8  59 18 84.0  9 235  9 80.6
8 143 10 84.0   9 312  8 80.6  9 235  6 80.6  9 235 11 80.6
9 312 10 80.6   7 321  6 76.0  8 156 19 84.0  8 156 14 84.0
7 321  3 76.0   7 321 12 76.0  7 489  2 76.0  7 489  9 76.0
7  78  1 76.0   7  78 10 76.0  7 489  2 76.0  7 156  1 76.0
7  78  6 76.0   7 412  6 76.0  7 156  2 76.0  9 301  8 80.6
;
data StudyGroups;
   input Grade _total_; 
   datalines;
7 608
8 252
9 403
;
proc surveymeans data=IceCreamStudy mean stacking ;
   weight Weight;
   strata Grade;
   cluster StudyGroup;      
   var Spending;
   ods output Statistics = Statistics 
              Summary = Summary;
run;  
data _null_;
   set Statistics;
   call symput("Spending_Mean",Spending_Mean);
run;
data Summary;
   set Summary;
   if Label1="Sum of Weights" then call symput("N",cValue1);
   if Label1="Number of Strata" then call symput("H",cValue1);
   if Label1="Number of Clusters" then call symput("C",cValue1);
run;

data Working;
   set IceCreamStudy;
   z=(1/(&N-1))*(Spending-&Spending_Mean)**2;
run;
proc surveymeans data = Working sum stacking;
   weight Weight;
   var z;
   ods output Statistics = Result(keep=z_Sum);
run;   
data _null_;
   set Result;
   call symput("Variance",z_Sum);
run;
data Taylor;
   set IceCreamStudy;
   u=(1/(&N-1))*((Spending-&Spending_Mean)**2 - &Variance);
run;
proc surveymeans data = Taylor sum varsum stacking total=StudyGroups;
   strata Grade;
   cluster StudyGroup;
   weight Weight;
   var u;
   ods output Statistics = TaylorResult;
run;          
%let df=%eval(&C - &H);

data TaylorResult;
   set TaylorResult(rename=(u_VarSum=Variance
                      u_StdDev=StdErr));
   Estimate=&Variance;
   LowerCL= Estimate + StdErr*TINV(.025,&df);
   UpperCL= Estimate + StdErr*TINV(.975,&df);
   label Estimate=Population Variance Estimate
         Variance=Variance of Estimate
         StdErr=Standard Error of Estimate
         LowerCL=Lower Confidence Limit      
         UpperCL=Upper Confidence Limit;
   Variable='Spending';
run;
title 'Parameter Estimates';

proc print data=TaylorResult label noobs;
   var Variable Estimate Variance StdErr LowerCL UpperCL;
run;

title ;
proc surveymeans data=IceCreamStudy mean stacking ;
   weight Weight;
   strata Grade;
   cluster StudyGroup;      
   var Spending;
   ods output Statistics = Statistics 
              Summary = Summary;
run;  
data _null_;
   set Statistics;
   call symput("Spending_Mean",Spending_Mean);
run;
data Summary;
   set Summary;
   if Label1="Sum of Weights" then call symput("N",cValue1);
   if Label1="Number of Strata" then call symput("H",cValue1);
run;

data Working;
   set IceCreamStudy;
   z=(1/(&N-1))*(Spending-&Spending_Mean)**2;
run;
proc surveymeans data=Working sum stacking 
                 varmethod=JACKKNIFE(outjkcoefs=Jkcoefs 
                                     outweights=Jkweights);
   strata Grade;
   cluster StudyGroup;
   weight Weight;
   var z;
   ods output Statistics = Result
              VarianceEstimation=VarianceEstimation;
run;
data _null_;
   set Result;
   call symput("Variance",z_Sum);
run;
data _null_;
   set VarianceEstimation;
   where label1="Number of Replicates";
   call symput("R",cvalue1);
run;

%let R=%eval(&R);
data Long(drop= RepWt_1 - RepWt_&R Z);
  set Jkweights;
  array num (*) RepWt_1 - RepWt_&R;
  do replicate=1 to dim(num);
    Jkweight=num(replicate);
   output;
  end;
run;
proc sort data=Long out=Long;
   by Replicate;
run;
proc surveymeans data=Long mean;
   weight Jkweight;
   var Spending;
   by Replicate;
   ods output Statistics = JKMeans(keep=Replicate Mean)
              Summary = JKN;
run;  
proc sort data=JKMeans out=JKMeans;
   by Replicate;
run;
data JKN(keep=N replicate );
   set JKN(rename=(nvalue1=N));
   where Label1="Sum of Weights";
run;
proc sort data=JKN out=JKN;
   by Replicate;
run;
data Long;
   merge Long JKN JKMeans;
   by Replicate;
run;
data Long;
   set Long;
   z=(1/(N-1))*(Spending-Mean)**2;
run;
proc surveymeans data=Long sum stacking;
   weight Jkweight;
   var z;
   by Replicate;
   ods output Statistics=Statistics(drop=Z_StdDEV rename=(Z_Sum=JKEstimate));
run;   
proc sort data=Statistics out=Statistics;
   by Replicate;
run;
proc sort data=Jkcoefs out=Jkcoefs;
   by Replicate;
run;
data Statistics;
   merge Statistics Jkcoefs;
   by Replicate;
run;
data Statistics;
   set Statistics;
   u=JKcoefficient*(JKEstimate-&Variance)**2;
run;
proc surveymeans data=Statistics sum;
   var u;
   ods output Statistics=JKResult(rename=(sum=Variance));
run;
%let df=%eval(&R-&H);
   
data JKResult;
   set JKResult;
   StdErr=sqrt(Variance);
   Estimate=&Variance;
   LowerCL= Estimate + StdErr*TINV(.025,&df);
   UpperCL= Estimate + StdErr*TINV(.975,&df);   
   label Estimate=Population Variance Estimate
         Variance=Variance of Estimate
         StdErr=Standard Error of Estimate
         LowerCL=Lower Confidence Limit      
         UpperCL=Upper Confidence Limit;
   Variable='Spending';
run;
title 'Parameter Estimates';

proc print data=JKResult label noobs;
   var Variable Estimate Variance StdErr LowerCL UpperCL;
run;

title ;
proc format; 
   value $line 
      F='F-Market & Wharves' 
      J='J-Church'
      K='K-Ingleside'
      L='L-Taraval'
      M='M-Ocean View' 
      N='N-Judah';
run;
data p;
   input p @@ ;
   weight=int(120/12+120/9+420/10+120/9+360/15)/2;
   datalines;
0.06 0.053 0.04 0.05 0.10 0.09 0.13 0.12 0.02 0.03 0.04
0.05 0.05 0.055 0.01 0.04 0.05 0.001 0.004 0.005 0.002
;

data f1;
   line='F'; vehicle=1;
   do passenger=1 to 65;
      waittime=rantbl(200,0.06,0.053,0.04,0.05,0.10,0.09, 0.13,
                      0.12,0.02,0.03,0.04,0.05,0.05,0.055,0.01,
                      0.04,0.05,0.001,0.004,0.005,0.002)-1;
      output;
   end;
run;

data f2; 
   line='F'; vehicle=2;
   do passenger=1 to 102;
      waittime=rantbl(103,0.06,0.053,0.04,0.05,0.10,0.09,0.13,
                      0.12,0.02,0.03,0.04,0.05,0.05,0.055,0.01,
                      0.04,0.05,0.001,0.004,0.005,0.002)-1;
      output;
   end;
run;

data f; 
   set f1 f2; 
   weight=int(70/15+120/6+420/8+120/7+360/15)/2;
run;

data j1; 
   line='J'; vehicle=1;
      do passenger=1 to 101;
         waittime=rantbl(2,0.06,0.003,0.04,0.05,0.10,0.09,0.13,
                         0.12,0.12,0.03,0.04,0.05,0.05,0.055,0.03,
                         0.04,0.05,0.001,0.004,0.025,0.002)-1;
         output;
      end;
run;

data j2; 
   line='J'; vehicle=2;
   do passenger=1 to 142;
      waittime=rantbl(7,0.06,0.053,0.04,0.09,0.13,0.05,0.10,0.12,
                      0.02,0.03,0.04,0.05,0.05,0.004,0.005,0.002,
                      0.055,0.01,0.04,0.05,0.001)-1;
      output;
   end;
run;

data j; 
   set j1 j2; 
   weight=int(120/15+120/9+420/10+120/9+360/15)/2;
run;

data k1; 
   line='K'; vehicle=1;
   do passenger=1 to 145;
      waittime=rantbl(111,0.06,0.003,0.04,0.05,0.10,0.09,0.13,0.12,
                      0.12,0.03,0.04,0.05,0.05,0.055,0.03,0.04,0.05,
                      0.001,0.004,0.025,0.002)-1;
      output;
   end;
run;

data k2; 
   line='K'; vehicle=2;
   do passenger=1 to 180;
      waittime=rantbl(71,0.06,0.053,0.04,0.09,0.13,0.05,0.10,0.12,
                      0.02,0.03,0.04,0.05,0.05,0.004,0.005,0.002,
                      0.055,0.01,0.04,0.05,0.001)-1;
      output;
   end;
run;

data k; 
   set k1 k2; 
   weight=int(120/15+120/9+420/10+120/9+360/15)/2;
run;

data L1; 
   line='L'; vehicle=1;
   do passenger=1 to 135;
      waittime=rantbl(1110,0.06,0.003,0.05,0.05,0.04,0.05,0.10,0.09,
                      0.13,0.12,0.12,0.03,0.04,0.055,0.03,0.04,0.05,
                      0.001,0.004,0.025,0.002)-1;
      output;
   end;
run;

data L2; 
   line='L'; vehicle=2;
   do passenger=1 to 185;
      waittime=rantbl(18,0.02,0.03,0.04,0.055,0.09,0.053,0.04,0.09,
                      0.13,0.05,0.10,0.12,0.04,0.05,0.05,0.004,0.005,
                      0.002,0.025,0.01,0.001)-1;
      output;
   end;
run;

data l; set L1 L2; 
   weight=int(120/8+120/10+420/8+120/9+360/15+300/30)/2;
run;

data m1; 
   line='M'; vehicle=1;
   do passenger=1 to 139;
      waittime=rantbl(1150,0.06,0.03,0.05,0.05,0.14,0.05,0.10,0.09,0.03,
                      0.12,0.12,0.03,0.04,0.015,0.03,0.02,0.05,0.001,
                      0.004,0.025,0.002)-1;
      output;
   end;
run;

data m2; 
   line='M'; vehicle=2;
   do passenger=1 to 203;
      waittime=rantbl(1008,0.03,0.03,0.05,0.055,0.29,0.053,0.04,0.09,
                      0.13,0.05,0.10,0.12,0.04,0.05,0.02,0.004,0.005,
                      0.002,0.015,0.01,0.001)-1;
      output;
   end;
run;

data m; 
   set m1 m2; 
   weight=int(70/15+120/9+420/10+120/9+360/15)/2;
run;

data n1; 
   line='N'; vehicle=1;
   do passenger=1 to 306;
      waittime=rantbl(1150,0.06,0.04,0.06,0.05,0.14,0.05,0.08,0.09,
                      0.03,0.12,0.12,0.03,0.04,0.015,0.03,0.02,0.05,
                      0.001,0.004,0.025,0.002)-1;
      output;
   end;
run;

data n2; 
   line='N'; vehicle=2;
   do passenger=1 to 234;
      waittime=rantbl(1008,0.03,0.05,0.05,0.05,0.07,0.053,0.04,0.03,
                      0.23,0.05,0.10,0.08,0.04,0.05,0.02,0.004,0.005,
                      0.012,0.015,0.02,0.001)-1;
      output;
   end;
run;

data n; 
   set n1 n2; 
   weight=int(120/12+120/7+420/10+120/7+360/12+300/30);
run;

data MUNIsurvey; 
   set f j k l m n; 
   format line $line.; 
run;
proc datasets nolist;
   delete f j k l m n;
run;
proc surveymeans data=MUNIsurvey mean stacking ;
   weight Weight;
   strata Line;
   cluster Vehicle;
   var Waittime;
   ods output Statistics = Statistics 
              Summary = Summary;
run;   
data _null_;
   set Statistics;
   call symput("Waittime_Mean",Waittime_Mean);
run;
data Summary;
   set Summary;
   if Label1="Sum of Weights" then call symput("N",cValue1);
   if Label1="Number of Strata" then call symput("H",cValue1);
run;

data Working;
   set MUNIsurvey;
   Z=(1/(&N-1))*(Waittime-&Waittime_Mean)**2;
run;
proc surveymeans data=Working sum stacking 
                 varmethod=brr(outweights=BRRweights);
   strata Line;
   cluster Vehicle;
   weight Weight;
   var z;
   ods output Statistics = Estimate
              VarianceEstimation=VarianceEstimation;
run;
data _null_;
   set Estimate;
   call symput("Variance",Z_Sum);
run;
data _null_;
   set VarianceEstimation;
   where label1="Number of Replicates";
   call symput("R",cvalue1);
run;

%let R=%eval(&R);
data Long(drop= RepWt_1 - RepWt_&R Z);
  set BRRweights;
  array num (*) RepWt_1 - RepWt_&R;
  do replicate=1 to dim(num);
    BRRweight=num(replicate);
   output;
  end;
run;
proc sort data=Long out=Long;
   by Replicate;
run;
proc surveymeans data=Long mean;
   weight BRRweight;
   var Waittime;
   by Replicate;
   ods output Statistics = BRRMeans(keep=Replicate Mean)
              Summary = BRRN;
run;  
proc sort data=BRRMeans out=BRRMeans;
   by Replicate;
run;
data BRRN(keep=N replicate );
   set BRRN(rename=(nvalue1=N));
   where Label1="Sum of Weights";
run;
proc sort data=BRRN out=BRRN;
   by Replicate;
run;
data Long;
   merge Long BRRN BRRMeans;
   by Replicate;
run;
data Long;
   set Long;
   z=(1/(N-1))*(Waittime-Mean)**2;
run;
proc surveymeans data=Long sum stacking;
   weight BRRweight;
   var z;
   by Replicate;
   ods output Statistics=Statistics(drop=Z_StdDEV 
                                    rename=(Z_Sum=BRREstimate));
run;   
data Statistics;
   set Statistics;
   u=(1/&R)*(BRREstimate-&Variance)**2;
run;
proc surveymeans data=Statistics sum;
   var u;
   ods output Statistics=BRRResult(rename=(sum=Variance));
run;
data BRRResult;
   set BRRResult;
   StdErr=sqrt(Variance);
   Estimate=&Variance;
   LowerCL= Estimate + StdErr*TINV(.025,&H);
   UpperCL= Estimate + StdErr*TINV(.975,&H);  
   Variable='Waittime';
   label Estimate=Population Variance Estimate
         Variance=Variance of Estimate
         StdErr=Standard Error of Estimate
         LowerCL=Lower Confidence Limit      
         UpperCL=Upper Confidence Limit;   
run;
title 'Parameter Estimates';

proc print data=BRRResult label noobs;
   var Variable Estimate Variance StdErr LowerCL UpperCL;
run;

title ;
data BRRStdDev;
   set BRRResult;
   Variance=(1/(4*Estimate))*Variance;
   Estimate=sqrt(Estimate);
   StdErr=sqrt(Variance);
   LowerCL= Estimate + StdErr*TINV(.025,&H);
   UpperCL= Estimate + StdErr*TINV(.975,&H);  
   label Estimate=Population Standard Deviation Estimate;   
run;
title 'Parameter Estimates';

proc print data=BRRStdDev label noobs;
   var Variable Estimate Variance StdErr LowerCL UpperCL;
run;

title ;