Size, Shape, and Correlation of Grocery Boxes
/****************************************************************/
/* S A S S A M P L E L I B R A R Y */
/* */
/* NAME: CLUSEX6 */
/* TITLE: Size, Shape, and Correlation of Grocery Boxes */
/* PRODUCT: STAT */
/* SYSTEM: ALL */
/* KEYS: CLUSTER ANALYSIS SIZE SHAPE CORRELATION */
/* PROCS: CLUSTER, STANDARD, TREE, SORT, PRINT, FREQ */
/* DATA: */
/* */
/* SUPPORT: sasrbk */
/* REF: PROC CLUSTER */
/* MISC: */
/****************************************************************/
title 'Cluster Analysis of Grocery Boxes';
data grocery2;
length name $35 /* name of product */
class $16 /* category of product */
unit $1 /* unit of measurement for weights:
g=gram
o=ounce
l=lb
all weights are converted to grams */
color $8 /* predominant color of box */
height 8 /* height of box in cm. */
width 8 /* width of box in cm. */
depth 8 /* depth of box (front to back) in cm. */
weight 8 /* weight of box in grams */
c_white c_yellow c_red c_green c_blue 4;
/* dummy variables */
retain class;
drop unit;
/*--- read name with possible embedded blanks ---*/
input name & @;
/*--- if name starts with "---", ---*/
/*--- it's really a category value ---*/
if substr(name,1,3) = '---' then do;
class = substr(name,4,index(substr(name,4),'-')-1);
delete;
return;
end;
/*--- read the rest of the variables ---*/
input height width depth weight unit color;
/*--- convert weights to grams ---*/
select (unit);
when ('l') weight = weight * 454;
when ('o') weight = weight * 28.3;
when ('g') ;
otherwise put 'Invalid unit ' unit;
end;
/*--- use 0/1 coding for dummy variables for colors ---*/
c_white = (color = 'w');
c_yellow = (color = 'y');
c_red = (color = 'r');
c_green = (color = 'g');
c_blue = (color = 'b');
datalines;
---Breakfast cereals---
Cheerios 32.5 22.4 8.4 567 g y
Cheerios 30.3 20.4 7.2 425 g y
Cheerios 27.5 19 6.2 283 g y
Cheerios 24.1 17.2 5.3 198 g y
Special K 30.1 20.5 8.5 18 o w
Special K 29.6 19.2 6.7 12 o w
Special K 23.4 16.6 5.7 7 o w
Corn Flakes 33.7 25.4 8 24 o w
Corn Flakes 30.2 20.6 8.4 18 o w
Corn Flakes 30 19.1 6.6 12 o w
Grape Nuts 21.7 16.3 4.9 680 g w
Shredded Wheat 19.7 19.9 7.5 283 g y
Shredded Wheat, Spoon Size 26.6 19.6 5.6 510 g r
All-Bran 21.1 14.3 5.2 13.8 o y
Froot Loops 30.2 20.8 8.5 19.7 o r
Froot Loops 25 17.7 6.4 11 o r
---Crackers---
Wheatsworth 11.1 25.2 5.5 326 g w
Ritz 23.1 16 5.3 340 g r
Ritz 23.1 20.7 5.2 454 g r
Premium Saltines 11 25 10.7 454 g w
Waverly Wafers 14.4 22.5 6.2 454 g g
---Detergent---
Arm & Hammer Detergent 38.8 30 16.9 25 l y
Arm & Hammer Detergent 39.5 25.8 11 14.2 l y
Arm & Hammer Detergent 33.7 22.8 7 7 l y
Arm & Hammer Detergent 27.8 19.4 6.3 4 l y
Tide 39.4 24.8 11.3 9.2 l r
Tide 32.5 23.2 7.3 4.5 l r
Tide 26.5 19.9 6.3 42 o r
Tide 19.3 14.6 4.7 17 o r
---Little Debbie---
Figaroos 13.5 18.6 3.7 12 o y
Swiss Cake Rolls 10.1 21.8 5.8 13 o w
Fudge Brownies 11 30.8 2.5 12 o w
Marshmallow Supremes 9.4 32 7 10 o w
Apple Delights 11.2 30.1 4.9 15 o w
Snack Cakes 13.4 32 3.4 13 o b
Nutty Bar 13.2 18.5 4.2 12 o y
Lemon Stix 13.2 18.5 4.2 9 o w
Fudge Rounds 8.1 28.3 5.4 9.5 o w
---Tea---
Celestial Seasonings Mint Magic 7.8 13.8 6.3 49 g b
Celestial Seasonings Cranberry Cove 7.8 13.8 6.3 46 g r
Celestial Seasonings Sleepy Time 7.8 13.8 6.3 37 g g
Celestial Seasonings Lemon Zinger 7.8 13.8 6.3 56 g y
Bigelow Lemon Lift 7.7 13.4 6.9 40 g y
Bigelow Plantation Mint 7.7 13.4 6.9 35 g g
Bigelow Earl Grey 7.7 13.4 6.9 35 g b
Luzianne 8.9 22.8 6.4 6 o r
Luzianne 18.4 20.2 6.9 8 o r
Luzianne Decaffeinated 8.9 22.8 6.4 5.25 o g
Lipton Tea Bags 17.1 20 6.7 8 o r
Lipton Tea Bags 11.5 14.4 6.6 3.75 o r
Lipton Tea Bags 6.7 10 5.7 1.25 o r
Lipton Family Size Tea Bags 13.7 24 9 12 o r
Lipton Family Size Tea Bags 8.7 20.8 8.2 6 o r
Lipton Family Size Tea Bags 8.9 11.1 8.2 3 o r
Lipton Loose Tea 12.7 10.9 5.4 8 o r
---Paste, Tooth---
Colgate 4.4 22 3.5 7 o r
Colgate 3.6 15.6 3.3 3 o r
Colgate 4.2 18.3 3.5 5 o r
Crest 4.3 21.7 3.7 6.4 o w
Crest 4.3 17.4 3.6 4.6 o w
Crest 3.5 15.2 3.2 2.7 o w
Crest 3.0 10.9 2.8 .85 o w
Arm & Hammer 4.4 17 3.7 5 o w
;
data grocery;
length name $16;
set grocery2;
run;
proc format; value $color
'w'='White'
'y'='Yellow'
'r'='Red'
'g'='Green'
'b'='Blue';
run;
%let cluster=1; /* 1=show CLUSTER output, 0=don't */
%let tree=0; /* 1=print TREE diagram, 0=don't */
%let list=0; /* 1=list clusters, 0=don't */
%let crosstab=1; /* 1=crosstabulate clusters and classes,
0=don't */
%let crosscol=0; /* 1=crosstabulate clusters and colors,
0=don't */
/*--- define macro with options for TREE ---*/
%macro treeopt;
%if &tree %then h page=1;
%else noprint;
%mend;
/*--- define macro with options for CLUSTER ---*/
%macro clusopt;
%if &cluster %then pseudo ccc p=20;
%else noprint;
%mend;
/*------ two macros for showing cluster results ------*/
%macro show(n); /* n=number of clusters
to show results for */
proc tree data=tree %treeopt n=&n out=out;
id name;
copy class height width depth weight color;
run;
%if &list %then %do;
proc sort;
by cluster;
run;
proc print;
var class name height width depth weight color;
by cluster clusname;
run;
%end;
%mend;
%macro show2 ;
%if &crosstab %then %do;
ods graphics on ;
proc freq ;
tables class * cluster / plots=freqplot ;
run;
ods graphics off ;
%end;
%if &crosscol %then %do;
ods graphics on ;
proc freq ;
tables color * cluster / plots=freqplot ;
run;
ods graphics off ;
%end;
%mend;
/**********************************************************/
/* */
/* Analysis 1: standardized box measurements */
/* */
/**********************************************************/
title2 'Analysis 1: Standardized data';
proc cluster data=grocery m=cen std %clusopt outtree=tree;
var height width depth weight;
copy name class color;
run;
%show(10);
%show2;
/**********************************************************/
/* */
/* Analysis 2: standardized row-centered logarithms */
/* */
/**********************************************************/
title2 'Row-centered logarithms';
data shape;
set grocery;
array x height width depth weight;
array l l_height l_width l_depth l_weight;
/* logarithms */
weight=weight**(1/3); /* take cube root to conform with
the other linear measurements */
do over l; /* take logarithms */
l=log(x);
end;
mean=mean( of l(*)); /* find row mean of logarithms */
do over l;
l=l-mean; /* center row */
end;
run;
title2 'Analysis 2: Standardized row-centered logarithms';
proc standard data=shape out=shapstan m=0 s=1;
var l_height l_width l_depth l_weight;
run;
proc cluster data=shapstan m=cen %clusopt outtree=tree;
var l_height l_width l_depth l_weight;
copy name class height width depth weight color;
run;
%show(8);
%show2;
/**********************************************************/
/* */
/* Analysis 3: standardized row-standardized logarithms */
/* */
/**********************************************************/
%let list=1;
%let crosscol=1;
title2 'Row-standardized logarithms';
data std;
set grocery;
array x height width depth weight;
array l l_height l_width l_depth l_weight;
/* logarithms */
weight=weight**(1/3); /* take cube root to conform with
the other linear measurements */
do over l;
l=log(x); /* take logarithms */
end;
mean=mean( of l(*)); /* find row mean of logarithms */
std=std( of l(*)); /* find row standard deviation */
do over l;
l=(l-mean)/std; /* standardize row */
end;
run;
title2 'Analysis 3: Standardized row-standardized logarithms';
proc standard data=std out=stdstan m=0 s=1;
var l_height l_width l_depth l_weight;
run;
proc cluster data=stdstan m=cen %clusopt outtree=tree;
var l_height l_width l_depth l_weight;
copy name class height width depth weight color;
run;
%show(7);
%show2 ;
/************************************************************/
/* */
/* Analyses 4-7: standardized row-standardized logs & color */
/* */
/************************************************************/
%let list=0;
%let crosscol=1;
title2
'Analysis 4: Standardized row-standardized
logarithms and color (s=.2)';
proc standard data=stdstan out=stdstan m=0 s=.2;
var c_:;
run;
proc cluster data=stdstan m=cen %clusopt outtree=tree;
var l_height l_width l_depth l_weight c_:;
copy name class height width depth weight color;
run;
%show(7);
%show2;
title2
'Analysis 5: Standardized row-standardized
logarithms and color (s=.3)';
proc standard data=stdstan out=stdstan m=0 s=.3;
var c_:;
run;
proc cluster data=stdstan m=cen %clusopt outtree=tree;
var l_height l_width l_depth l_weight c_:;
copy name class height width depth weight color;
run;
%show(6);
%show2;
title2
'Analysis 6: Standardized row-standardized
logarithms and color (s=.4)';
proc standard data=stdstan out=stdstan m=0 s=.4;
var c_:;
run;
proc cluster data=stdstan m=cen %clusopt outtree=tree;
var l_height l_width l_depth l_weight c_:;
copy name class height width depth weight color;
run;
%show(3);
%show2;
title2
'Analysis 7: Standardized row-standardized
logarithms and color (s=.8)';
proc standard data=stdstan out=stdstan m=0 s=.8;
var c_:;
run;
proc cluster data=stdstan m=cen %clusopt outtree=tree;
var l_height l_width l_depth l_weight c_:;
copy name class height width depth weight color;
run;
%show(10);
%show2;