Resources

Johnson System of Distributions-Macro

 /****************************************************************/
 /*          S A S   S A M P L E   L I B R A R Y                 */
 /*                                                              */
 /*    NAME: JOHNSYS4                                            */
 /*   TITLE: Johnson System of Distributions-Macro               */
 /* PRODUCT: QC                                                  */
 /*  SYSTEM: ALL                                                 */
 /*    KEYS: Capability Analysis, Johnson System,                */
 /*   PROCS: IML CAPABILITY                                      */
 /*    DATA:                                                     */
 /*                                                              */
 /*   NOTES:                                                     */
 /*    MISC: Macro JOHNSYS takes as input the data set (stored   */
 /*          as macro variable DATA). The proper Johnson system  */
 /*          is selected using the ratio given in Slifker and    */
 /*          Shenton (1980).  The parameter estimates for the    */
 /*          appropriate system are then computed and displayed  */
 /*          along with the johnson curve on a histogram.        */
 /*                                                              */
 /*     REF: Slifker, J. F. and Shapiro, S. S. (1980), "The      */
 /*          Johnson System: Selection and Parameter Estimation",*/
 /*          Technometrics, Vol. 22, No. 2, pg. 239-246.         */
 /*                                                              */
 /*          Bowman, K. O. and Shenton, L. R. (1983), "Johnson's */
 /*          System of Distributions", Encyclopedia of           */
 /*          Statistical Sciences, Vol. 4, pg. 303-314.          */
 /*          John Wiley & Sons, Inc.                             */
 /*                                                              */
 /****************************************************************/
options  ps=60 ls=80;

title1 'Johnson System of Distributions';

 /*---------------------------------------------------------*/
 /*                                                         */
 /*    Generate 150 observations from Johnson Su System     */
 /*                                                         */
 /*---------------------------------------------------------*/

data sudata;
   lambda  = 2.0;
   eta     = 1.5;
   gamma   = 0.5;
   epsilon = 4.0 ;
   do i = 1 to 150;
      z = rannor(9931131);
      a = exp( (z-gamma)/eta);
      x = lambda*(a**2 -1)/(2*a) + epsilon;
      output;
      end;
   keep x;
run;

 /*---------------------------------------------------------*/
 /*                                                         */
 /*    Generate 150 observations from Johnson Sb System     */
 /*                                                         */
 /*---------------------------------------------------------*/

data sbdata;
   lambda  = 1;
   eta     = 1;
   gamma   = 1;
   epsilon = 2;
   do i = 1 to 150;
      z = rannor(993131);
      a = exp( (z-gamma)/eta);
      x = lambda / ( 1 + ( 1/a ) ) + epsilon;
      output;
      end;
   keep x;
run;

 /*---------------------------------------------------------*/
 /*                                                         */
 /*    Generate 150 observations from Johnson Sl System     */
 /*                                                         */
 /*---------------------------------------------------------*/

data sldata;
   lambda  = 1;
   eta     = 2;
   gamma   = 2;
   epsilon = 3;
   do i = 1 to 150;
      z = rannor(993131);
      a = exp( (z-gamma)/eta);
      x = lambda*a + epsilon;
      output;
      end;
   keep x;
run;

 /*------------------------------*/
 /*                              */
 /* The JOHNSYS macro definition */
 /*                              */
 /*------------------------------*/

%macro johnsys(data);

proc iml;

sort &data out=sorted by x;
use sorted;

read all var {x} into x;

nobs=nrow(x);

 /*---  Choose z-value for percentile fit ---*/

zval  = .524;

p3    = probnorm(3*zval);
p1    = probnorm(zval);
pm1   = probnorm(-1*zval);
pm3   = probnorm(-3*zval);

i4    = p3*nobs  + .5;
i3    = p1*nobs  + .5;
i2    = pm1*nobs + .5;
i1    = pm3*nobs + .5;

x3z   = x[int(i4)] + mod(i4,1)*(x[int(i4)+1] - x[int(i4)]);
x1z   = x[int(i3)] + mod(i3,1)*(x[int(i3)+1] - x[int(i3)]);
xm1z  = x[int(i2)] + mod(i2,1)*(x[int(i2)+1] - x[int(i2)]);
xm3z  = x[int(i1)] + mod(i1,1)*(x[int(i1)+1] - x[int(i1)]);

m     = x3z  - x1z;
n     = xm1z - xm3z;
p     = x1z  - xm1z;

 /*--- Ratio used to choose proper Johnson system ---*/

ratio = m*n/p**2;

tol   = .05;

 /*---Select appropriate Johnson Family & Estimate Parameters---*/

if (ratio > 1.0 + tol) then do;

 /*--- Johnson Su Parameter Estimates ---*/

   temp    = .5*(m/p + n/p);
   eta     = 2*zval/(log(temp + sqrt(temp*temp -1 )));

   temp    = (n/p - m/p) / ( 2*(sqrt(m*n/(p*p) - 1)) );
   gamma   = eta*log(temp + sqrt(temp*temp + 1));

   lambda  = 2*p*sqrt(m*n/(p*p)-1)/((m/p+n/p- 2)*sqrt(m/p+n/p+2));
   epsilon = (x1z + xm1z)/2 + p*(n/p - m/p) / ( 2*(m/p+n/p-2));

   create parms var{eta gamma lambda epsilon};
   append;
   close parms;
   type = '1';
   end;

else if (ratio < 1.0 - tol ) then do;

 /*--- Johnson Sb Parameter Estimates ---*/

   temp    = .5* sqrt( (1 + p/m)*(1 + p/n) );
   eta     = zval / log( temp + sqrt(temp*temp - 1) );

   temp    = (p/n-p/m)*sqrt((1+p/m)*(1+p/n)- 4)/(2*(p*p/(m*n)-1));
   gamma   = eta*log(temp + sqrt(temp*temp + 1) );

   lambda  = p*sqrt(( (1 + p/m)*(1+p/n)-2)**2 - 4)/(p*p/(m*n)-1);
   epsilon = (x1z + xm1z)/2-lambda/2+p*(p/n-p/m)/(2*(p*p/(m*n)-1));

   create parms var{eta gamma lambda epsilon};
   append;
   close parms;
   type = '2';

   end;

else do;

   /* Johnson Sl Parameter Estimates */

   eta     = 2*zval / log(m/p);
   gamma   = eta*log( (m/p - 1) / (p*sqrt(m/p)) );
   epsilon = (x1z + xm1z)/2 - (p/2)* (m/p + 1) / (m/p - 1);

   /* create dataset containing parameters */
   create parms var{gamma eta epsilon};
   append;
   close parms;
   type = '3';

   end;

call symput('type',type);

quit;

 /*---------------------------------------*/
 /*                                       */
 /* Use PROC CAPABILITY to save width of  */
 /* histogram bars in OUTFIT= dataset.    */
 /*                                       */
 /*---------------------------------------*/

proc capability data=&data noprint;
   hist x / outfit = out1 normal(noprint) noplot;
   run;

 /*------------------------------------------------------------*/
 /*                                                            */
 /* Create annotate dataset that will superimpose the johnson  */
 /* curve and a table containing the parameter estimates onto  */
 /* a histogram.                                               */
 /*                                                            */
 /*------------------------------------------------------------*/

data anno;
   merge parms out1;
   length function style color cepsilon clambda cgamma ceta $ 8
          text $ 20;
   function = 'point';
   color    = 'yellow';
   size     = 2;
   xsys     = '2';
   ysys     = '2';
   when     = 'a';
   pi       = 3.14159;

   /*--- Determine constant based on Johnson family ---*/

   %if (&type = 1) %then %do;
      constant = (100*_width_/lambda)*(eta/sqrt(2*pi));
      %end;

   %else %if (&type = 2) %then %do;
      constant = (100*_width_/lambda)*(eta/sqrt(2*pi));
      %end;

   %else %do;
      constant = (100*_width_*eta/sqrt(2*pi));
      %end;

   start    = _midpt1_ - .5*_width_;
   finish   = _midptn_ + .5*_width_;

   /*--- Draw Johnson Curve ---*/

   do x=start to finish by .01;

      %if (&type = 1) %then %do;
         a        = (x-epsilon)/lambda;
         invsinh  = log(a + sqrt(a**2 + 1));
         y        = constant*
                    (1/sqrt(1+a**2))*
                    exp(-.5*(gamma+eta*invsinh)**2);
         %end;

      %else %if (&type = 2) %then %do;
         if (x > epsilon) then do;
            a     = (x-epsilon)/lambda;
            fy    = log(a / (1 - a ));
            y     = constant*
                    (1/(a*(1-a)))*
                    exp(-.5*(gamma + eta*fy)**2);
            end;
         else y = 0;
         %end;

      %else %do;
         if (x > epsilon) then do;
            a     = (x-epsilon);
            y     = constant*
                    (1/a)*
                    exp(-.5*(gamma + eta*log(a))**2);
            end;
         else y = 0;
         %end;

      output;
      function ='draw';
      end;

   %if (&type= 3) %then %do;
      lambda = 0;
      %end;

   /*--- Format Parameters with 3 Decimal Places ---*/

   cepsilon = put(epsilon,6.3);
   cgamma   = put(gamma,  6.3); ceta = put(eta,   6.3);

   %if (&type = 1) or (&type = 2) %then %do;
      clambda = put(lambda,6.3);
      %end;

   /*--- Draw Table containing parameter estimates ---*/

   function = 'move';     xsys = '1';
                          ysys = '1';
                          x = 100; y = 100;        output;
                          xsys = 'A';
                          ysys = 'A';
                          x = -3;  y = -3;         output;
   function = 'cntl2txt';                          output;
   function = 'push';                              output;
   function = 'label';    hsys     = 'A';
                          size     =  1;
                          position = 'a';
                          style    = 'none';
                          color    = 'white';
                          x = 0; y = 0;
   %if (&type= 1) %then %do;
                          text = 'Johnson (Su)';   output;
   %end;
   %else %if (&type= 2) %then %do;
                          text = 'Johnson (Sb)';   output;
   %end;
   %else %do;
                          text = 'Johnson (Sl)';   output;
   %end;

   function = 'move';     x = -13;   y = .5;       output;
   function = 'draw';     color = 'yellow';
                          size  =  2;
                          x = -4; y = 0;           output;

   function = 'cntl2txt';                          output;

   function = 'label';    size     =  1;
                          color    = 'white';
                          position = 'c';
               x = 0;  y = -3.5; text = 'Epsilon'; output;
               x = -7; y = -2;   text = 'Gamma';   output;
               x = -5; y = -2;   text = 'Eta';     output;

   %if (&type = 1) or (&type = 2) %then %do;
               x = -3; y = -2;  text = 'Lambda';   output;
   %end;

   function = 'pop';                               output;
   function = 'label'; position = 'a';  x = 0;
                       y = -3; text = cepsilon;    output;
                       y = -2; text = cgamma;      output;
                       y = -2; text = ceta;        output;
   %if (&type = 1) or (&type = 2) %then %do;
                       y = -2; text = clambda;     output;
   %end;

   %if (&type = 3) %then %do;
      %let y = 10;
      %end;
   %else %do;
      %let y = 12;
      %end;

   function = 'move';     x = -18; y = 2;          output;
   function = 'poly';     style = 'solid';
                          color = 'blue';
                          x = 0;   y = 0;          output;
   function = 'polycont'; x = 19;  y = 0;          output;
                          x = 19;  y = -&y;        output;
                          x = 0;   y = -&y;        output;
                          x = 0;   y = 0;          output;

   function = 'move';     x = 0;   y = -3;         output;
   function = 'draw';     color = 'white';
                          x = 19;  y = 0;          output;

   function = 'move';     xsys = '1';
                          ysys = '1';
                          x = 100; y = 100;        output;
                          xsys = 'A';
                          ysys = 'A';
                          x = -3;  y = -3;         output;

   function = 'move';     x = -13;   y = .5;       output;
   function = 'draw';     color = 'yellow';
                          size  =  2;
                          x = -4; y = 0;           output;

   keep function xsys  ysys     hsys  x     y
        text     size  position style color when;

   run;


 /*--------------------------------------------------------------*/
 /*                                                              */
 /* Display histogram with Johnson curve and parameter estimates */
 /*                                                              */
 /*--------------------------------------------------------------*/

pattern1 c=red v=s;

proc capability data=&data annotate=anno noprint;
   hist x / cframe = gray;
run;

%mend;

 /*---------------------------------------------------------*/
 /*                                                         */
 /* Invoke JOHNSYS macro with Sb data, Su data and Sl data. */
 /*                                                         */
 /*---------------------------------------------------------*/

options nogstyle;

%johnsys(sbdata);

%johnsys(sudata);

%johnsys(sldata);

options gstyle;

goptions reset=global;