Cluster Analysis of Breakfast Cereals

dm "output;clear;log;clear";
Options PS=55 LS=80 PageNo=1 NoDate MPrint
        FORMCHAR='|----|+|---+=|-/\<>*';
GOptions Reset=ALL HTitle=1 HText=1 FTitle=Swiss FText=Swiss NoPrompt;

/*
 * PDF code
 *
ODS Listing Close;
Filename GSASFile Dummy;
GOptions Device=PDF FText=Helvetica FTitle=Helvetica;
Options TopMargin=1 BottomMargin=1 LeftMargin=1.0 RightMargin=1.0;
ODS PDF File="BreakfastCereals.pdf";
*/

/*
 * HTML code
 * Remove length specifications in axes statements.
 *
ODS Listing Close;
ODS HTML body=    "BreakfastCereals.html" 
         headtext="<title>Cluster Analysis of Cereals</title>"
         gpath="BCereal"
         anchor="BCereal";
GOptions Device=GIF Transparency NoBorder
         HText=1 FText=Swiss HTitle=1 FTitle=Swiss;
*/

Title1 'Breakfast Cereals - J&W Table 11.9';
Data Cereals;
 Length Brand $23 Label $4 Manufacturer $1 ;
 Input Brand Manufacturer Calories Protein Fat Sodium Fiber
       Carbohydrates Sugar Potassium Group;
 /* Build a short label for identifying the cereals */
 Label=Compress(Brand,"abcdefghijklmnopqrstuvwxyz");
 If Length(Label)=1 Then Label=Substr(Brand,1,4);
 Else If Label="CP" Then If Brand="CocoaPuffs" Then Label="Coco";
Datalines;
ACCheerios  G  110  2  2  180  1.5  10.5  10  70  1
Cheerios  G  110  6  2  290  2.0  17.0  1  105  1
CocoaPuffs  G  110  1  1  180  0.0  12.0  13  55  1
CountChocula  G  110  1  1  180  0.0  12.0  13  65  1
GoldenGrahams  G  110  1  1  280  0.0  15.0  9  45  1
HoneyNutCheerios  G  110  3  1  250  1.5  11.5  10  90  1
Kix  G  110  2  1  260  0.0  21.0  3  40  1
LuckyCharms  G  110  2  1  180  0.0  12.0  12  55  1
MultiGrainCheerios  G  100  2  1  220  2.0  15.0  6  90  1
OatmealRaisinCrisp  G  130  3  2  170  1.5  13.5  10  120  1
RaisinNutBran  G  100  3  2  140  2.5  10.5  8  140  1
TotalCornFlakes  G  110  2  1  200  0.0  21.0  3  35  1
TotalRaisinBran  G  140  3  1  190  4.0  15.0  14  230  1
TotalWholeGrain  G  100  3  1  200  3.0  16.0  3  110  1
Trix  G  110  1  1  140  0.0  13.0  12  25  1
Cheaties  G  100  3  1  200  3.0  17.0  3  110  1
WheatiesHoneyGold  G  110  2  1  200  1.0  16.0  8  60  1
AllBran  K  70  4  1  260  9.0  7.0  5  320  2
AppleJacks  K  110  2  0  125  1.0  11.0  14  30  2
CornFlakes  K  100  2  0  290  1.0  21.0  2  35  2
CornPops  K  110  1  0  90  1.0  13.0  12  20  2
CracklinOatBran  K  110  3  3  140  4.0  10.0  7  160  2
Crispix  K  110  2  0  220  1.0  21.0  3  30  2
FrootLoops  K  110  2  1  125  1.0  11.0  13  30  2
FrostedFlakes  K  110  1  0  200  1.0  14.0  11  25  2
FrostedMiniWheats  K  100  3  0  0  3.0  14.0  7  100  2
FruitfulBran  K  120  3  0  240  5.0  14.0  12  190  2
JustRightCrunchyNuggets  K  110  2  1  170  1.0  17.0  6  60  2
MueslixCrispyBlend  K  160  3  2  150  3.0  17.0  13  160  2
NutNHoneyCrunch  K  120  2  1  190  0.0  15.0  9  40  2
NutriGrainAlmondRaisin  K  140  3  2  220  3.0  21.0  7  130  2
NutriGrainWheat  K  90  3  0  170  3.0  18.0  2  90  2
Product19  K  100  3  0  320  1.0  20.0  3  45  2
RaisinBran  K  120  3  1  210  5.0  14.0  12  240  2
RiceKrispies  K  110  2  0  290  0.0  22.0  3  35  2
Smacks  K  110  2  1  70  1.0  9.0  15  40  2
SpecialK  K  110  6  0  230  1.0  16.0  3  55  2
CapNCrunch  Q  120  1  2  220  0.0  12.0  12  35  3
HoneyGrahamOhs  Q  120  1  2  220  1.0  12.0  11  45  3
Life  Q  100  4  2  150  2.0  12.0  6  95  3
PuffedRice  Q  50  1  0  0  0.0  13.0  0  15  3
PuffedWheat  Q  50  2  0  0  1.0  10.0  0  50  3
QuakerOatmeal  Q  100  5  2  0  2.7  1.0  1  110  3
;

Proc Print Data=Cereals Label;
 Var Brand Label Calories -- Potassium;
 Label Sodium="Na" Potassium="K" Carbohydrates="Carbs";
 /* labels here are to fit records on a single line */
Run;

Title2 "Summary Statistics";
Proc Means Data=Cereals Mean Std Min Max;
 Var Calories -- Potassium;
Run;

/*
 * For this example it might be preferable to standardize the variables 
 * prior to cluster analysis. This would give equal weight to each 
 * of the variables in the cluster analysis since distances
 * would be derived from differences in standard deviation units,
 * rather than from differences in the original units (g, mg, KCal).
 * PROC STANDARD could be used to first standardize the variables,
 * then the cluster analysis would be performed on the STDCEREALS
 * data set. 
 */
Proc Standard Data=Cereals Out=StdCereals Mean=0 Std=1;
 Var Calories -- Potassium;
Run;

/*
 * Single, Average, and Complete Linkage algorithms will be
 * used, and finally the Ward's method will be applied.
 */
Title2 "Single Linkage Cluster Analysis";
Proc Cluster Data=StdCereals Method=Single OutTree=TreeData;
 Var Calories -- Potassium;
 Id Label;
Run;

GOptions Reset=Symbol Reset=Axis;
Proc GPlot Data=TreeData;
 Plot _HEIGHT_*_NCL_=1 / VAxis=Axis1;
 Axis1 Label=(A=90);
 Symbol1 C=Black V=Dot I=SplineS;
Run;
Quit;

Proc Tree Data=TreeData VAxis=Axis1;
 Id Label;
 Axis1 Label=(A=90);
Run;
 
Title2 "Average Linkage Cluster Analysis";
Proc Cluster Data=StdCereals Method=Average OutTree=TreeData;
 Var Calories -- Potassium;
 Id Label;
Run;

GOptions Reset=Symbol Reset=Axis;
Proc GPlot Data=TreeData;
 Plot _HEIGHT_*_NCL_=1 / VAxis=Axis1;
 Axis1 Label=(A=90);
 Symbol1 C=Black V=Dot I=SplineS;
Run;
Quit;

Proc Tree Data=TreeData VAxis=Axis1;
 Id Label;
 Axis1 Label=(A=90);
Run;
 
Title2 "Complete Linkage Cluster Analysis";
Proc Cluster Data=StdCereals Method=Complete OutTree=TreeData;
 Var Calories -- Potassium;
 Id Label;
Run;

GOptions Reset=Symbol Reset=Axis;
Proc GPlot Data=TreeData;
 Plot _HEIGHT_*_NCL_=1 / VAxis=Axis1;
 Axis1 Label=(A=90);
 Symbol1 C=Black V=Dot I=SplineS;
Run;
Quit;

Proc Tree Data=TreeData VAxis=Axis1;
 Id Label;
 Axis1 Label=(A=90);
Run;
 
Title2 "Ward's Method Cluster Analysis";
Proc Cluster Data=StdCereals Method=Ward OutTree=TreeData;
 Var Calories -- Potassium;
 Id Label;
Run;

GOptions Reset=Symbol Reset=Axis;
Proc GPlot Data=TreeData;
 Plot _HEIGHT_*_NCL_=1 / VAxis=Axis1;
 Axis1 Label=(A=90);
 Symbol1 C=Black V=Dot I=SplineS;
Run;
Quit;

Proc Tree Data=TreeData NCL=3 Out=Clusters VAxis=Axis1;
 Id Label;
 Axis1 Label=(A=90);
Run;

/*
 * Put the cluster identification data back with
 * the original cereal data and list them out.
 * Note that the LABEL variable is common to both
 * data sets and is unique to each cereal. Thus sort
 * by LABEL and perform a match merge.
 */
Proc Sort Data=Cereals;
 By Label;
Run;
Proc Sort Data=Clusters;
 By Label;
Run;
Data CerealClusters;
 Length ClusterC $1; /* character cluster identifier */
 Merge Cereals Clusters;
 By Label;
 ClusterC=Put(Cluster,1.);
 Drop ClusName;
Run;

/*
 * Order them for a nice report.
 */
Proc Sort Data=CerealClusters;
 By Cluster Brand;
Run;

Title3 "Cereals Assigned To One of Three Clusters";
Proc Print Data=CerealClusters Label;
 Var Cluster Brand Label Calories -- Potassium;
 Label Cluster="Cl#" Calories="Cal" Sodium="Na" 
       Potassium="K" Carbohydrates="Carbs";
 /* labels here are to fit records on a single line */
Run;
Title3;

/*
 * Another graphical way of clustering and of viewing
 * cluster results when the variables are continuous, is
 * to perform a principal components analysis on the
 * variables, plot the observations in the space of the
 * components, and label the points according to the cluster
 * to which they were assigned. Note that the cluster analysis
 * was performed on standardized variables, so we should
 * use PCA on the correlation matrix here.
 */
Title2 "Principal Components Analysis";
Proc Princomp Data=CerealClusters Out=Comps;
 Var Calories -- Potassium;
Run;

Title3 "Symbols and Colors Indicate Cluster Membership (Ward's Method)";
Proc GPlot Data=Comps;
 Plot Prin2*Prin1=Cluster
  / VAxis=Axis1 HAxis=Axis2;
 Axis1 Label=(A=90 "Principal Component 2")
       Order=(-4 To 6 By 1)
       Length=5.75in;
 Axis2 Label=("Principal Component 1")
       Order=(-4 To 6 By 1)
       Length=5.75in;
 Symbol1 C=Black V=Circle   I=None Pointlabel=(C=Black H=0.75 "#Label");
 Symbol2 C=Blue  V=Triangle I=None Pointlabel=(C=Blue  H=0.75 "#Label");
 Symbol3 C=Red   V=Square   I=None Pointlabel=(C=Red   H=0.75 "#Label");
Run;
Quit;

Proc GPlot Data=Comps;
 Plot Prin3*Prin1=Cluster
  / VAxis=Axis1 HAxis=Axis2;
 Axis1 Label=(A=90 "Principal Component 3")
       Order=(-4 To 6 By 1)
       Length=5.75in;
 Axis2 Label=("Principal Component 1")
       Order=(-4 To 6 By 1)
       Length=5.75in;
 Symbol1 C=Black V=Circle   I=None Pointlabel=(C=Black H=0.75 "#Label");
 Symbol2 C=Blue  V=Triangle I=None Pointlabel=(C=Blue  H=0.75 "#Label");
 Symbol3 C=Red   V=Square   I=None Pointlabel=(C=Red   H=0.75 "#Label");
Run;
Quit;

Proc GPlot Data=Comps;
 Plot Prin3*Prin2=Cluster
  / VAxis=Axis1 HAxis=Axis2;
 Axis1 Label=(A=90 "Principal Component 3")
       Order=(-4 To 6 By 1)
       Length=5.75in;
 Axis2 Label=("Principal Component 2")
       Order=(-4 To 6 By 1)
       Length=5.75in;
 Symbol1 C=Black V=Circle   I=None Pointlabel=(C=Black H=0.75 "#Label");
 Symbol2 C=Blue  V=Triangle I=None Pointlabel=(C=Blue  H=0.75 "#Label");
 Symbol3 C=Red   V=Square   I=None Pointlabel=(C=Red   H=0.75 "#Label");
Run;
Quit;
Title3;

*ODS PDF Close;
*ODS HTML Close;
*ODS Listing;


 
Breakfast Cereals - J&W Table 11.9

Obs Brand Label Calories Protein Fat Na Fiber Carbs Sugar K
1 ACCheerios ACC 110 2 2 180 1.5 10.5 10 70
2 Cheerios Chee 110 6 2 290 2.0 17.0 1 105
3 CocoaPuffs Coco 110 1 1 180 0.0 12.0 13 55
4 CountChocula CC 110 1 1 180 0.0 12.0 13 65
5 GoldenGrahams GG 110 1 1 280 0.0 15.0 9 45
6 HoneyNutCheerios HNC 110 3 1 250 1.5 11.5 10 90
7 Kix Kix 110 2 1 260 0.0 21.0 3 40
8 LuckyCharms LC 110 2 1 180 0.0 12.0 12 55
9 MultiGrainCheerios MGC 100 2 1 220 2.0 15.0 6 90
10 OatmealRaisinCrisp ORC 130 3 2 170 1.5 13.5 10 120
11 RaisinNutBran RNB 100 3 2 140 2.5 10.5 8 140
12 TotalCornFlakes TCF 110 2 1 200 0.0 21.0 3 35
13 TotalRaisinBran TRB 140 3 1 190 4.0 15.0 14 230
14 TotalWholeGrain TWG 100 3 1 200 3.0 16.0 3 110
15 Trix Trix 110 1 1 140 0.0 13.0 12 25
16 Cheaties Chea 100 3 1 200 3.0 17.0 3 110
17 WheatiesHoneyGold WHG 110 2 1 200 1.0 16.0 8 60
18 AllBran AB 70 4 1 260 9.0 7.0 5 320
19 AppleJacks AJ 110 2 0 125 1.0 11.0 14 30
20 CornFlakes CF 100 2 0 290 1.0 21.0 2 35
21 CornPops CP 110 1 0 90 1.0 13.0 12 20
22 CracklinOatBran COB 110 3 3 140 4.0 10.0 7 160
23 Crispix Cris 110 2 0 220 1.0 21.0 3 30
24 FrootLoops FL 110 2 1 125 1.0 11.0 13 30
25 FrostedFlakes FF 110 1 0 200 1.0 14.0 11 25
26 FrostedMiniWheats FMW 100 3 0 0 3.0 14.0 7 100
27 FruitfulBran FB 120 3 0 240 5.0 14.0 12 190
28 JustRightCrunchyNuggets JRCN 110 2 1 170 1.0 17.0 6 60
29 MueslixCrispyBlend MCB 160 3 2 150 3.0 17.0 13 160
30 NutNHoneyCrunch NNHC 120 2 1 190 0.0 15.0 9 40
31 NutriGrainAlmondRaisin NGAR 140 3 2 220 3.0 21.0 7 130
32 NutriGrainWheat NGW 90 3 0 170 3.0 18.0 2 90
33 Product19 P19 100 3 0 320 1.0 20.0 3 45
34 RaisinBran RB 120 3 1 210 5.0 14.0 12 240
35 RiceKrispies RK 110 2 0 290 0.0 22.0 3 35
36 Smacks Smac 110 2 1 70 1.0 9.0 15 40
37 SpecialK SK 110 6 0 230 1.0 16.0 3 55
38 CapNCrunch CNC 120 1 2 220 0.0 12.0 12 35
39 HoneyGrahamOhs HGO 120 1 2 220 1.0 12.0 11 45
40 Life Life 100 4 2 150 2.0 12.0 6 95
41 PuffedRice PR 50 1 0 0 0.0 13.0 0 15
42 PuffedWheat PW 50 2 0 0 1.0 10.0 0 50
43 QuakerOatmeal QO 100 5 2 0 2.7 1.0 1 110

 


 
Breakfast Cereals - J&W Table 11.9
Summary Statistics

The MEANS Procedure

Variable Mean Std Dev Minimum Maximum
Calories
Protein
Fat
Sodium
Fiber
Carbohydrates
Sugar
Potassium
107.9069767
2.4651163
0.9767442
180.4651163
1.7139535
14.2558140
7.6046512
84.4186047
18.9684123
1.2218027
0.8014384
79.2134996
1.7992831
4.2571678
4.5363061
66.1096609
50.0000000
1.0000000
0
0
0
1.0000000
0
15.0000000
160.0000000
6.0000000
3.0000000
320.0000000
9.0000000
22.0000000
15.0000000
320.0000000

 


 
Breakfast Cereals - J&W Table 11.9
Single Linkage Cluster Analysis

The CLUSTER Procedure
Single Linkage Cluster Analysis

Eigenvalues of the Covariance Matrix
  Eigenvalue Difference Proportion Cumulative
1 2.54756944 0.69280765 0.3184 0.3184
2 1.85476179 0.08593184 0.2318 0.5503
3 1.76882996 0.90054660 0.2211 0.7714
4 0.86828336 0.37098255 0.1085 0.8799
5 0.49730081 0.13987236 0.0622 0.9421
6 0.35742845 0.29692509 0.0447 0.9868
7 0.06050337 0.01518055 0.0076 0.9943
8 0.04532281   0.0057 1.0000

Root-Mean-Square Total-Sample Standard Deviation = 1

Mean Distance Between Observations = 3.711894

 

Cluster History
NCL Clusters Joined FREQ Norm
Min
Dist
T
i
e
42 Coco CC 2 0.0408  
41 TWG Chea 2 0.0633  
40 CNC HGO 2 0.1662  
39 WHG JRCN 2 0.1689  
38 CL42 Trix 3 0.2024  
37 Kix TCF 2 0.2051  
36 CF RK 2 0.2239  
35 CL38 LC 4 0.2284  
34 CL39 NNHC 3 0.2407  
33 FL Smac 2 0.2584  
32 CL36 P19 3 0.2612  
31 CL35 CL33 6 0.2744  
30 MGC CL34 4 0.2833  
29 CL32 Cris 4 0.2842  
28 CL31 CL30 10 0.3047  
27 AJ CP 2 0.3075  
26 RNB Life 2 0.3349  
25 ACC CL40 3 0.3403  
24 CL28 CL27 12 0.3414  
23 CL24 CL41 14 0.3436  
22 PR PW 2 0.357  
21 TRB RB 2 0.3571  
20 CL37 CL29 6 0.3575  
19 CL23 CL20 20 0.3729  
18 CL19 FF 21 0.3846  
17 CL18 GG 22 0.3954  
16 CL17 NGW 23 0.3972  
15 CL25 CL16 26 0.4023  
14 CL21 FB 3 0.4061  
13 CL15 HNC 27 0.4362  
12 CL26 COB 3 0.4413  
11 CL13 CL12 30 0.453  
10 CL11 ORC 31 0.4561  
9 CL14 MCB 4 0.5792  
8 CL9 NGAR 5 0.586  
7 CL10 CL8 36 0.5865  
6 CL7 FMW 37 0.713  
5 Chee SK 2 0.7587  
4 CL6 CL5 39 0.7856  
3 CL4 QO 40 0.9467  
2 CL3 CL22 42 0.9597  
1 CL2 AB 43 1.1899  

 


 

 


 
The TREE Procedure
Single Linkage Cluster Analysis

 


 
Breakfast Cereals - J&W Table 11.9
Average Linkage Cluster Analysis

The CLUSTER Procedure
Average Linkage Cluster Analysis

Eigenvalues of the Covariance Matrix
  Eigenvalue Difference Proportion Cumulative
1 2.54756944 0.69280765 0.3184 0.3184
2 1.85476179 0.08593184 0.2318 0.5503
3 1.76882996 0.90054660 0.2211 0.7714
4 0.86828336 0.37098255 0.1085 0.8799
5 0.49730081 0.13987236 0.0622 0.9421
6 0.35742845 0.29692509 0.0447 0.9868
7 0.06050337 0.01518055 0.0076 0.9943
8 0.04532281   0.0057 1.0000

Root-Mean-Square Total-Sample Standard Deviation = 1

Root-Mean-Square Distance Between Observations = 4

 

Cluster History
NCL Clusters Joined FREQ Norm
RMS
Dist
T
i
e
42 Coco CC 2 0.0378  
41 TWG Chea 2 0.0587  
40 CNC HGO 2 0.1542  
39 WHG JRCN 2 0.1567  
38 Kix TCF 2 0.1903  
37 CL42 Trix 3 0.2007  
36 CF RK 2 0.2078  
35 CL37 LC 4 0.2348  
34 FL Smac 2 0.2398  
33 CL39 NNHC 3 0.2621  
32 CL36 Cris 3 0.266  
31 AJ CP 2 0.2854  
30 RNB Life 2 0.3107  
29 MGC CL41 3 0.3269  
28 CL32 P19 4 0.3301  
27 PR PW 2 0.3313  
26 TRB RB 2 0.3314  
25 ACC CL40 3 0.3576  
24 CL31 FF 3 0.3768  
23 CL35 CL34 6 0.3906  
22 GG CL33 4 0.4086  
21 CL38 CL28 6 0.4258  
20 CL29 NGW 4 0.433  
19 CL26 FB 3 0.4424  
18 CL23 CL24 9 0.4498  
17 CL22 HNC 5 0.489  
16 CL30 COB 3 0.4898  
15 CL25 CL17 8 0.5279  
14 MCB NGAR 2 0.5438  
13 ORC CL16 4 0.5461  
12 CL15 CL18 17 0.5643  
11 CL21 CL20 10 0.6476  
10 Chee SK 2 0.7041  
9 CL19 CL14 5 0.7975  
8 CL12 CL13 21 0.8309  
7 CL8 CL11 31 0.8726  
6 CL7 FMW 32 0.936  
5 CL6 CL9 37 1.031  
4 CL5 CL10 39 1.1179  
3 CL4 CL27 41 1.3636  
2 CL3 QO 42 1.4124  
1 CL2 AB 43 1.7291  

 


 

 


 
The TREE Procedure
Average Linkage Cluster Analysis

 


 
Breakfast Cereals - J&W Table 11.9
Complete Linkage Cluster Analysis

The CLUSTER Procedure
Complete Linkage Cluster Analysis

Eigenvalues of the Covariance Matrix
  Eigenvalue Difference Proportion Cumulative
1 2.54756944 0.69280765 0.3184 0.3184
2 1.85476179 0.08593184 0.2318 0.5503
3 1.76882996 0.90054660 0.2211 0.7714
4 0.86828336 0.37098255 0.1085 0.8799
5 0.49730081 0.13987236 0.0622 0.9421
6 0.35742845 0.29692509 0.0447 0.9868
7 0.06050337 0.01518055 0.0076 0.9943
8 0.04532281   0.0057 1.0000

Root-Mean-Square Total-Sample Standard Deviation = 1

Mean Distance Between Observations = 3.711894

 

Cluster History
NCL Clusters Joined FREQ Norm
Max
Dist
T
i
e
42 Coco CC 2 0.0408  
41 TWG Chea 2 0.0633  
40 CNC HGO 2 0.1662  
39 WHG JRCN 2 0.1689  
38 Kix TCF 2 0.2051  
37 CF RK 2 0.2239  
36 CL42 Trix 3 0.2294  
35 FL Smac 2 0.2584  
34 CL37 Cris 3 0.289  
33 CL36 LC 4 0.2934  
32 AJ CP 2 0.3075  
31 CL39 NNHC 3 0.3188  
30 RNB Life 2 0.3349  
29 PR PW 2 0.357  
28 TRB RB 2 0.3571  
27 MGC CL41 3 0.3607  
26 ACC CL40 3 0.4257  
25 CL32 FF 3 0.4265  
24 CL34 P19 4 0.4384  
23 GG CL31 4 0.5123  
22 CL33 CL35 6 0.5211  
21 CL28 FB 3 0.5382  
20 HNC ORC 2 0.5465  
19 CL27 NGW 4 0.5706  
18 MCB NGAR 2 0.586  
17 CL20 CL30 4 0.6008  
16 CL38 CL24 6 0.6135  
15 CL26 CL23 7 0.686  
14 CL22 CL25 9 0.7193  
13 Chee SK 2 0.7587  
12 CL16 CL19 10 0.8455  
11 CL15 CL14 16 0.9234  
10 CL17 COB 5 0.9242  
9 CL21 CL18 5 1.0063  
8 FMW CL29 3 1.0942  
7 CL10 CL9 10 1.1586  
6 CL13 CL12 12 1.2309  
5 CL11 CL7 26 1.3754  
4 CL8 QO 4 1.621  
3 CL5 CL6 38 1.6623  
2 CL3 CL4 42 2.1351  
1 CL2 AB 43 2.2375  

 


 

 


 
The TREE Procedure
Complete Linkage Cluster Analysis

 


 
Breakfast Cereals - J&W Table 11.9
Ward's Method Cluster Analysis

The CLUSTER Procedure
Ward's Minimum Variance Cluster Analysis

Eigenvalues of the Covariance Matrix
  Eigenvalue Difference Proportion Cumulative
1 2.54756944 0.69280765 0.3184 0.3184
2 1.85476179 0.08593184 0.2318 0.5503
3 1.76882996 0.90054660 0.2211 0.7714
4 0.86828336 0.37098255 0.1085 0.8799
5 0.49730081 0.13987236 0.0622 0.9421
6 0.35742845 0.29692509 0.0447 0.9868
7 0.06050337 0.01518055 0.0076 0.9943
8 0.04532281   0.0057 1.0000

Root-Mean-Square Total-Sample Standard Deviation = 1

Root-Mean-Square Distance Between Observations = 4

 

Cluster History
NCL Clusters Joined FREQ SPRSQ RSQ T
i
e
42 Coco CC 2 0.0000 1.00  
41 TWG Chea 2 0.0001 1.00  
40 CNC HGO 2 0.0006 .999  
39 WHG JRCN 2 0.0006 .999  
38 Kix TCF 2 0.0009 .998  
37 CF RK 2 0.0010 .997  
36 CL42 Trix 3 0.0013 .996  
35 FL Smac 2 0.0014 .994  
34 CL36 LC 4 0.0016 .993  
33 CL37 Cris 3 0.0019 .991  
32 AJ CP 2 0.0019 .989  
31 CL39 NNHC 3 0.0020 .987  
30 RNB Life 2 0.0023 .984  
29 PR PW 2 0.0026 .982  
28 TRB RB 2 0.0026 .979  
27 CL33 P19 4 0.0032 .976  
26 MGC CL41 3 0.0034 .973  
25 CL32 FF 3 0.0039 .969  
24 ACC CL40 3 0.0039 .965  
23 GG CL31 4 0.0053 .960  
22 CL28 FB 3 0.0053 .954  
21 CL26 NGW 4 0.0058 .948  
20 HNC ORC 2 0.0061 .942  
19 CL30 COB 3 0.0068 .935  
18 MCB NGAR 2 0.0070 .928  
17 CL34 CL35 6 0.0078 .921  
16 CL38 CL27 6 0.0089 .912  
15 CL17 CL25 9 0.0114 .900  
14 CL24 CL20 5 0.0115 .889  
13 Chee SK 2 0.0118 .877  
12 CL14 CL23 9 0.0164 .861  
11 CL21 FMW 5 0.0193 .841  
10 CL19 QO 4 0.0280 .813  
9 CL22 CL18 5 0.0289 .784  
8 CL12 CL15 18 0.0363 .748  
7 CL13 CL11 7 0.0426 .706  
6 CL7 CL16 13 0.0584 .647  
5 CL10 AB 5 0.0707 .576  
4 CL5 CL9 10 0.0905 .486  
3 CL6 CL29 15 0.1116 .374  
2 CL8 CL3 33 0.1690 .205  
1 CL2 CL4 43 0.2055 .000  

 


 

 


 
The TREE Procedure
Ward's Minimum Variance Cluster Analysis

 


 
Breakfast Cereals - J&W Table 11.9
Ward's Method Cluster Analysis
Cereals Assigned To One of Three Clusters

Obs Cl# Brand Label Cal Protein Fat Na Fiber Carbs Sugar K
1 1 ACCheerios ACC 110 2 2 180 1.5 10.5 10 70
2 1 AppleJacks AJ 110 2 0 125 1.0 11.0 14 30
3 1 CapNCrunch CNC 120 1 2 220 0.0 12.0 12 35
4 1 CocoaPuffs Coco 110 1 1 180 0.0 12.0 13 55
5 1 CornPops CP 110 1 0 90 1.0 13.0 12 20
6 1 CountChocula CC 110 1 1 180 0.0 12.0 13 65
7 1 FrootLoops FL 110 2 1 125 1.0 11.0 13 30
8 1 FrostedFlakes FF 110 1 0 200 1.0 14.0 11 25
9 1 GoldenGrahams GG 110 1 1 280 0.0 15.0 9 45
10 1 HoneyGrahamOhs HGO 120 1 2 220 1.0 12.0 11 45
11 1 HoneyNutCheerios HNC 110 3 1 250 1.5 11.5 10 90
12 1 JustRightCrunchyNuggets JRCN 110 2 1 170 1.0 17.0 6 60
13 1 LuckyCharms LC 110 2 1 180 0.0 12.0 12 55
14 1 NutNHoneyCrunch NNHC 120 2 1 190 0.0 15.0 9 40
15 1 OatmealRaisinCrisp ORC 130 3 2 170 1.5 13.5 10 120
16 1 Smacks Smac 110 2 1 70 1.0 9.0 15 40
17 1 Trix Trix 110 1 1 140 0.0 13.0 12 25
18 1 WheatiesHoneyGold WHG 110 2 1 200 1.0 16.0 8 60
19 2 Cheaties Chea 100 3 1 200 3.0 17.0 3 110
20 2 Cheerios Chee 110 6 2 290 2.0 17.0 1 105
21 2 CornFlakes CF 100 2 0 290 1.0 21.0 2 35
22 2 Crispix Cris 110 2 0 220 1.0 21.0 3 30
23 2 FrostedMiniWheats FMW 100 3 0 0 3.0 14.0 7 100
24 2 Kix Kix 110 2 1 260 0.0 21.0 3 40
25 2 MultiGrainCheerios MGC 100 2 1 220 2.0 15.0 6 90
26 2 NutriGrainWheat NGW 90 3 0 170 3.0 18.0 2 90
27 2 Product19 P19 100 3 0 320 1.0 20.0 3 45
28 2 PuffedRice PR 50 1 0 0 0.0 13.0 0 15
29 2 PuffedWheat PW 50 2 0 0 1.0 10.0 0 50
30 2 RiceKrispies RK 110 2 0 290 0.0 22.0 3 35
31 2 SpecialK SK 110 6 0 230 1.0 16.0 3 55
32 2 TotalCornFlakes TCF 110 2 1 200 0.0 21.0 3 35
33 2 TotalWholeGrain TWG 100 3 1 200 3.0 16.0 3 110
34 3 AllBran AB 70 4 1 260 9.0 7.0 5 320
35 3 CracklinOatBran COB 110 3 3 140 4.0 10.0 7 160
36 3 FruitfulBran FB 120 3 0 240 5.0 14.0 12 190
37 3 Life Life 100 4 2 150 2.0 12.0 6 95
38 3 MueslixCrispyBlend MCB 160 3 2 150 3.0 17.0 13 160
39 3 NutriGrainAlmondRaisin NGAR 140 3 2 220 3.0 21.0 7 130
40 3 QuakerOatmeal QO 100 5 2 0 2.7 1.0 1 110
41 3 RaisinBran RB 120 3 1 210 5.0 14.0 12 240
42 3 RaisinNutBran RNB 100 3 2 140 2.5 10.5 8 140
43 3 TotalRaisinBran TRB 140 3 1 190 4.0 15.0 14 230

 


 
Breakfast Cereals - J&W Table 11.9
Principal Components Analysis

The PRINCOMP Procedure

Observations 43
Variables 8
 
Simple Statistics
  Calories Protein Fat Sodium Fiber Carbohydrates Sugar Potassium
Mean 107.9069767 2.465116279 0.9767441860 180.4651163 1.713953488 14.25581395 7.604651163 84.41860465
StD 18.9684123 1.221802673 0.8014383526 79.2134996 1.799283111 4.25716776 4.536306106 66.10966090
 
Correlation Matrix
  Calories Protein Fat Sodium Fiber Carbohydrates Sugar Potassium
Calories 1.0000 0.0327 0.3883 0.3366 -.0194 0.2559 0.5795 0.1414
Protein 0.0327 1.0000 0.2058 0.0937 0.5125 -.0761 -.3999 0.5001
Fat 0.3883 0.2058 1.0000 0.0096 0.1637 -.3227 0.1873 0.3121
Sodium 0.3366 0.0937 0.0096 1.0000 0.0434 0.5673 -.0488 0.1144
Fiber -.0194 0.5125 0.1637 0.0434 1.0000 -.2408 -.0343 0.9288
Carbohydrates 0.2559 -.0761 -.3227 0.5673 -.2408 1.0000 -.3152 -.2232
Sugar 0.5795 -.3999 0.1873 -.0488 -.0343 -.3152 1.0000 0.0814
Potassium 0.1414 0.5001 0.3121 0.1144 0.9288 -.2232 0.0814 1.0000
 
Eigenvalues of the Correlation Matrix
  Eigenvalue Difference Proportion Cumulative
1 2.54756944 0.69280765 0.3184 0.3184
2 1.85476179 0.08593184 0.2318 0.5503
3 1.76882996 0.90054660 0.2211 0.7714
4 0.86828336 0.37098255 0.1085 0.8799
5 0.49730081 0.13987236 0.0622 0.9421
6 0.35742845 0.29692509 0.0447 0.9868
7 0.06050337 0.01518055 0.0076 0.9943
8 0.04532281   0.0057 1.0000
 
Eigenvectors
  Prin1 Prin2 Prin3 Prin4 Prin5 Prin6 Prin7 Prin8
Calories 0.114288 0.656598 0.135123 -.100479 0.453703 0.135781 -.468935 -.288583
Protein 0.420836 -.227941 0.253063 -.371677 0.551363 -.420374 0.242315 0.171241
Fat 0.315690 0.301787 -.165646 -.687460 -.431966 0.251310 0.125059 0.209366
Sodium 0.022537 0.273076 0.591711 0.075408 -.501848 -.555796 -.086933 -.028197
Fiber 0.558123 -.134670 0.069649 0.376351 -.075699 0.194259 -.452959 0.524626
Carbohydrates -.238093 0.113458 0.631592 0.058778 0.131506 0.534840 0.369793 0.296615
Sugar 0.038335 0.565922 -.362068 0.374915 0.123791 -.253640 0.435076 0.371024
Potassium 0.583102 -.000102 0.072717 0.296338 -.115579 0.211327 0.408627 -.584717